# Evaluation Using Eli5 and Lime

In [1]:
# import packages
import numpy as np
import pandas as pd

# word2vec pipeline
from sklearn.pipeline import make_pipeline
from word2vec import get_embed_features_list
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

# eli5
import eli5
from eli5.lime import TextExplainer



In [2]:
# define custom transformer to embed words
class FeatureEmbedder(BaseEstimator, TransformerMixin): 
    def __init__( self ):
        return
    
    # Return self nothing else to do here    
    def fit( self, X, y = None ):
        return self 
    
    # Method that describes what we need this transformer to do
    def transform( self, X, y = None ):
        return get_embed_features_list(X)

In [3]:
# load data
all_train = pd.read_csv('data/all_train.csv', header = 0)[["date_time", "text", "label"]]
all_test = pd.read_csv('data/all_test.csv', header = 0)[["date_time", "text", "label"]]

## Logistic Regression

In [7]:
# create pipeline
vec = FeatureEmbedder()
lr = LogisticRegression()
pipe_lr = make_pipeline(vec, lr)
pipe_lr.fit(all_train.text, all_train.label)

Pipeline(steps=[(&#39;featureembedder&#39;, FeatureEmbedder()),
                (&#39;logisticregression&#39;, LogisticRegression())])

In [8]:
eli5.show_weights(lr, vec=vec, top=10, target_names=all_train.label)

Weight?,Feature
+2.249,x187
+2.245,x200
+2.228,x180
+2.107,x227
+2.029,x178
+2.027,x195
+1.961,x142
… 145 more positive …,… 145 more positive …
… 146 more negative …,… 146 more negative …
-1.941,x15


In [6]:
# text explainer
te_lr = TextExplainer(random_state=42)
te_lr.fit(all_test.text.iloc[1], pipe_lr.predict_proba)
te_lr.show_prediction(target_names=[0,1])

Contribution?,Feature
0.496,deposit
0.461,users
0.436,can
0.429,resumes
0.338,cancel
0.289,activity
0.285,now
0.264,<BIAS>
0.258,resumed
0.246,following


## SVM

In [22]:
# create pipeline
vec = FeatureEmbedder()
svm = SVC(probability=True)
pipe_svm = make_pipeline(vec, svm)
pipe_svm.fit(all_train.text, all_train.label)

Pipeline(steps=[(&#39;featureembedder&#39;, FeatureEmbedder()),
                (&#39;svc&#39;, SVC(probability=True))])

In [23]:
# text explainer
te_svm = TextExplainer(random_state=42)
te_svm.fit(all_test.text.iloc[1], pipe_svm.predict_proba)
te_svm.show_prediction(target_names=[0,1])

Contribution?,Feature
0.634,deposit
0.543,following
0.505,<BIAS>
0.485,resumes
0.357,activity
0.313,now
0.309,cancel
0.273,resumed
0.273,can
0.236,open


## Random Forest

In [24]:
# create pipeline
vec = FeatureEmbedder()
rf = RandomForestClassifier()
pipe_rf = make_pipeline(vec, rf)
pipe_rf.fit(all_train.text, all_train.label)

Pipeline(steps=[(&#39;featureembedder&#39;, FeatureEmbedder()),
                (&#39;randomforestclassifier&#39;, RandomForestClassifier())])

In [42]:
# text explainer
te_rf = TextExplainer(random_state=42)
te_rf.fit(all_test.text.iloc[0], pipe_rf.predict_proba)
te_rf.show_prediction(target_names=[0,1])

Contribution?,Feature
0.408,talent
0.359,for
0.355,digital
0.23,its
0.226,is
0.195,holding
0.182,company
0.176,fidelity
0.135,wing
0.113,based
