In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score 
from sklearn.metrics import recall_score 
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score 

In [2]:
def helper(df):
    tmp= [df.columns[1]] + [df.columns[0]] +list(df.columns[2:])
    df = df[tmp]
    data = np.array(df)
    data =data[:,2:]
    return data

In [8]:
dimension = 50
def reloadGraph(modelPath):
    tf.reset_default_graph()
    sess = tf.Session()
    
    metaFile = modelPath.split('/')[-1]+'.ckpt.meta'
    saver = tf.train.import_meta_graph(os.path.join(modelPath, metaFile))
    saver.restore(sess,tf.train.latest_checkpoint(modelPath))
    graph = tf.get_default_graph()
    return graph, sess

def inference(graph, loaded_sess, inputX):
    X = tf.placeholder(tf.float32, shape=[None, dimension], name='input')
    sess = loaded_sess
    #with loaded_sess as sess:
    w1 = graph.get_tensor_by_name('fc_l1_doc/weights:0')
    b1 = graph.get_tensor_by_name('fc_l1_doc/biases:0')
    w2 = graph.get_tensor_by_name('fc_l2_doc/weights:0')
    b2 = graph.get_tensor_by_name('fc_l2_doc/biases:0')
    embd = tf.nn.sigmoid(tf.matmul(tf.nn.sigmoid(tf.matmul(X, w1)+b1), w2)+b2)
    feed = {X:inputX}
    output = sess.run(embd, feed_dict=feed)
    return output

In [10]:
metrics = {}

root= '/workspace/Guowei/rll/model'
for f in os.listdir(root):
    try:
        modelPath = os.path.join(root, f)
        graph, session = reloadGraph(modelPath)

        train = pd.read_csv('../raw_data/train.csv').drop(columns=['id'])
        y_tr = train['mv_fluency']
        input_tr = helper(train)
        embd_tr = inference(graph, session, input_tr)

        test = pd.read_csv('../raw_data/test.csv').drop(columns=['id'])
        y_ts = test['mv_fluency']
        input_ts = helper(test)
        embd_ts = inference(graph, session, input_ts)
        session.close()

        model = LogisticRegression(solver='lbfgs', max_iter=500)
        model.fit(embd_tr, y_tr)
        y_hat_ts = model.predict(embd_ts)
        y_hat_ts_proba = model.predict_proba(embd_ts)
        acc = accuracy_score(y_ts, y_hat_ts)
        prec = precision_score(y_ts, y_hat_ts)
        recall = recall_score(y_ts, y_hat_ts)
        auc = roc_auc_score(y_ts, y_hat_ts_proba[:,1])
        metrics[f] = (acc, auc, prec, recall)
    except:
        pass

INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_32_lr_0.05_penalty_0.1_bs_128/RLL_l1_512_l2_32_lr_0.05_penalty_0.1_bs_128.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_32_lr_0.05_penalty_10.0_bs_1024/RLL_l1_128_l2_32_lr_0.05_penalty_10.0_bs_1024.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_128_lr_0.05_penalty_1.0_bs_256/RLL_l1_128_l2_128_lr_0.05_penalty_1.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_512_lr_0.05_penalty_1.0_bs_128/RLL_l1_1024_l2_512_lr_0.05_penalty_1.0_bs_128.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_256_l2_128_lr_0.05_penalty_1.0_bs_1024/RLL_l1_256_l2_128_lr_0.05_penalty_1.0_bs_1024.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_128_lr_0.05_penalty_0.1_bs_128/RLL_l1_128_l2_128_lr_0.05_penalty_0.1_bs_128.ckpt
INFO:t

INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_64_lr_0.05_penalty_5.0_bs_128/RLL_l1_1024_l2_64_lr_0.05_penalty_5.0_bs_128.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_128_lr_0.05_penalty_1.0_bs_1024/RLL_l1_1024_l2_128_lr_0.05_penalty_1.0_bs_1024.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_64_lr_0.05_penalty_0.1_bs_256/RLL_l1_128_l2_64_lr_0.05_penalty_0.1_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_128_lr_0.05_penalty_5.0_bs_512/RLL_l1_1024_l2_128_lr_0.05_penalty_5.0_bs_512.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_256_lr_0.05_penalty_5.0_bs_128/RLL_l1_512_l2_256_lr_0.05_penalty_5.0_bs_128.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_64_lr_0.05_penalty_0.1_bs_256/RLL_l1_512_l2_64_lr_0.05_penalty_0.1_bs_256.ckpt
INFO:ten

INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_64_lr_0.05_penalty_10.0_bs_512/RLL_l1_128_l2_64_lr_0.05_penalty_10.0_bs_512.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_128_lr_0.05_penalty_0.1_bs_256/RLL_l1_1024_l2_128_lr_0.05_penalty_0.1_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_64_lr_0.05_penalty_5.0_bs_512/RLL_l1_128_l2_64_lr_0.05_penalty_5.0_bs_512.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_128_lr_0.05_penalty_1.0_bs_256/RLL_l1_512_l2_128_lr_0.05_penalty_1.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_64_lr_0.05_penalty_1.0_bs_128/RLL_l1_512_l2_64_lr_0.05_penalty_1.0_bs_128.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_256_l2_32_lr_0.05_penalty_10.0_bs_1024/RLL_l1_256_l2_32_lr_0.05_penalty_10.0_bs_1024.ckpt
INFO:tenso

INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_64_lr_0.05_penalty_1.0_bs_1024/RLL_l1_128_l2_64_lr_0.05_penalty_1.0_bs_1024.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_32_lr_0.05_penalty_1.0_bs_1024/RLL_l1_1024_l2_32_lr_0.05_penalty_1.0_bs_1024.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_32_lr_0.05_penalty_1.0_bs_256/RLL_l1_512_l2_32_lr_0.05_penalty_1.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_32_lr_0.05_penalty_10.0_bs_512/RLL_l1_512_l2_32_lr_0.05_penalty_10.0_bs_512.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_64_lr_0.05_penalty_10.0_bs_512/RLL_l1_1024_l2_64_lr_0.05_penalty_10.0_bs_512.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_512_l2_128_lr_0.05_penalty_0.1_bs_256/RLL_l1_512_l2_128_lr_0.05_penalty_0.1_bs_256.ckpt
INFO:ten

INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_256_l2_128_lr_0.05_penalty_1.0_bs_256/RLL_l1_256_l2_128_lr_0.05_penalty_1.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_512_lr_0.05_penalty_10.0_bs_512/RLL_l1_1024_l2_512_lr_0.05_penalty_10.0_bs_512.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_256_l2_64_lr_0.05_penalty_5.0_bs_256/RLL_l1_256_l2_64_lr_0.05_penalty_5.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_128_l2_32_lr_0.05_penalty_1.0_bs_256/RLL_l1_128_l2_32_lr_0.05_penalty_1.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_128_lr_0.05_penalty_10.0_bs_256/RLL_l1_1024_l2_128_lr_0.05_penalty_10.0_bs_256.ckpt
INFO:tensorflow:Restoring parameters from /workspace/Guowei/rll/model/RLL_l1_1024_l2_64_lr_0.05_penalty_0.1_bs_128/RLL_l1_1024_l2_64_lr_0.05_penalty_0.1_bs_128.ckpt
INFO:t

In [13]:
sorted_metrics = sorted(metrics.items(), key=lambda k: k[1], reverse=True)
sorted_metrics

[('RLL_l1_64_l2_32_lr_0.05_penalty_5.0_bs_512',
  (0.8764044943820225, 0.9027298850574713, 0.9152542372881356, 0.9)),
 ('RLL_l1_64_l2_64_lr_0.05_penalty_5.0_bs_1024',
  (0.8764044943820225,
   0.9027298850574713,
   0.9083333333333333,
   0.9083333333333333)),
 ('RLL_l1_512_l2_32_lr_0.05_penalty_5.0_bs_512',
  (0.8707865168539326,
   0.9132183908045977,
   0.9008264462809917,
   0.9083333333333333)),
 ('RLL_l1_512_l2_512_lr_0.05_penalty_5.0_bs_1024',
  (0.8707865168539326,
   0.9104885057471264,
   0.8943089430894309,
   0.9166666666666666)),
 ('RLL_l1_512_l2_64_lr_0.05_penalty_0.1_bs_1024',
  (0.8707865168539326,
   0.9086206896551724,
   0.9008264462809917,
   0.9083333333333333)),
 ('RLL_l1_64_l2_64_lr_0.05_penalty_5.0_bs_256',
  (0.8707865168539326, 0.9076149425287356, 0.907563025210084, 0.9)),
 ('RLL_l1_256_l2_128_lr_0.05_penalty_0.1_bs_128',
  (0.8707865168539326,
   0.9070402298850575,
   0.9008264462809917,
   0.9083333333333333)),
 ('RLL_l1_64_l2_64_lr_0.05_penalty_10.0_bs_512

In [16]:
acc_lst, prec_lst, recall_lst, auc_lst = [], [], [], []
model_lst = []

for item in sorted_metrics:
    model_lst.append(item[0])
    acc_lst.append(item[1][0])
    auc_lst.append(item[1][1])
    prec_lst.append(item[1][2])
    recall_lst.append(item[1][3])
    
metric_df = pd.DataFrame()
metric_df['model_setting'] = model_lst
metric_df['accuracy'] = acc_lst
metric_df['auc'] = auc_lst
metric_df['precision'] = prec_lst
metric_df['recall'] = recall_lst

In [17]:
metric_df

Unnamed: 0,model_setting,accuracy,auc,precision,recall
0,RLL_l1_64_l2_32_lr_0.05_penalty_5.0_bs_512,0.876404,0.902730,0.915254,0.900000
1,RLL_l1_64_l2_64_lr_0.05_penalty_5.0_bs_1024,0.876404,0.902730,0.908333,0.908333
2,RLL_l1_512_l2_32_lr_0.05_penalty_5.0_bs_512,0.870787,0.913218,0.900826,0.908333
3,RLL_l1_512_l2_512_lr_0.05_penalty_5.0_bs_1024,0.870787,0.910489,0.894309,0.916667
4,RLL_l1_512_l2_64_lr_0.05_penalty_0.1_bs_1024,0.870787,0.908621,0.900826,0.908333
5,RLL_l1_64_l2_64_lr_0.05_penalty_5.0_bs_256,0.870787,0.907615,0.907563,0.900000
6,RLL_l1_256_l2_128_lr_0.05_penalty_0.1_bs_128,0.870787,0.907040,0.900826,0.908333
7,RLL_l1_64_l2_64_lr_0.05_penalty_10.0_bs_512,0.870787,0.905891,0.907563,0.900000
8,RLL_l1_256_l2_32_lr_0.05_penalty_10.0_bs_1024,0.870787,0.905603,0.907563,0.900000
9,RLL_l1_128_l2_32_lr_0.05_penalty_10.0_bs_1024,0.870787,0.905603,0.900826,0.908333


# lr baseline with raw features + majority vote labels

In [21]:
train = pd.read_csv('../raw_data/train.csv').drop(columns=['id'])
y_tr = train['mv_fluency']
input_tr = helper(train)

test = pd.read_csv('../raw_data/test.csv').drop(columns=['id'])
y_ts = test['mv_fluency']
input_ts = helper(test)

model = LogisticRegression(solver='lbfgs', max_iter=2000)
model.fit(input_tr, y_tr)
y_hat_ts = model.predict(input_ts)
y_hat_ts_proba = model.predict_proba(input_ts)

acc = accuracy_score(y_ts, y_hat_ts)
prec = precision_score(y_ts, y_hat_ts)
recall = recall_score(y_ts, y_hat_ts)
auc = roc_auc_score(y_ts, y_hat_ts_proba[:,1])
print(acc, auc, prec, recall)

0.8146067415730337 0.9033045977011493 0.8372093023255814 0.9




In [None]:
#lr = LogisticRegression(solver='lbfgs', max_iter=500)
#lr = LogisticRegression()

In [None]:
#lr = LogisticRegression(solver='lbfgs', max_iter=500)
#lr = LogisticRegression()
