# **LIME Explanations**

In [1]:
import numpy as np
import pandas as pd

In [2]:
UNDER_EDUCATED_TEST_FEATURES = "../../01_data/test/under_educated_features.csv"
COLLEGE_EDUCATED_TEST_FEATURES = "../../01_data/test/college_educated_features.csv"
WELL_EDUCATED_TEST_FEATURES = "../../01_data/test/well_educated_features.csv"
WHOLE_DATASET = "../../01_data/census_labeled_indexed.csv"

In [3]:
whole_census = pd.read_csv(WHOLE_DATASET, index_col="index")

In [4]:
under = np.loadtxt(UNDER_EDUCATED_TEST_FEATURES, delimiter=",")
coll = np.loadtxt(COLLEGE_EDUCATED_TEST_FEATURES, delimiter=",")
well = np.loadtxt(WELL_EDUCATED_TEST_FEATURES, delimiter=",")

## **Checking Model Inaccuracy**

In [5]:
MODEL_NAME = "under_educated"
TARGET_SET = "../../01_data/test/well_educated_labels.csv"

In [6]:
import sys
sys.path.append("..")

import neural_network
model = neural_network.create_model(well)

model.load_weights("../../03_models/"+MODEL_NAME+"/")

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f962a9316d8>

In [7]:
predictions = np.argmax(model.predict(well), axis=1)

### Searching for a big mistake:

In [8]:
target = np.loadtxt(TARGET_SET, delimiter=",")
target = np.argmax(target, axis=1)
diffs = np.abs(predictions - target)

#searching for a big mistake:
idx = 0
while (idx<len(diffs)) and (diffs[idx] < 2):
    idx += 1

In [9]:
comp_human_readable = whole_census[whole_census["education-num"] >= 13].iloc[idx]
print("predicted: %d\t actual: %d"%(predictions[idx], target[idx]))
comp_human_readable

predicted: 0	 actual: 2


hours-per-week                     40
age                                32
capital-gain                        0
capital-loss                        0
workclass                     Private
education                   Bachelors
education-num                      13
marital-status     Married-civ-spouse
relationship                  Husband
race                            White
gender                           Male
native-country          United-States
income                          77700
occupation            Exec-managerial
income-category                     2
Name: 38, dtype: object

### Explaining the mistake:

In [12]:
import sklearn
import sklearn.datasets
import sklearn.ensemble

In [13]:
feature_names = ["age", "workclass", "education-num", "marital-status",
                 "occupation", "relationship", "race", "gender",
                 "capital-gain", "capital-loss", "hours-per-week", "native-country"]
label_names = ["income-category"]

categorical_features = ["workclass",
                   "marital-status",
                   "relationship",
                   "race",
                   "native-country",
                   "occupation", "gender"]

In [None]:
data = whole_census[whole_census["education-num"] < 10]

categorical_names = {}
for i,feature in enumerate(categorical_features:
    le = sklearn.preprocessing.LabelEncoder()
    le.fit(data[feature])
    data[feature] = le.transform(data[feature])
    categorical_names[feature] = le.classes_

In [10]:
whole_census[whole_census["education-num"]<10]

Unnamed: 0_level_0,hours-per-week,age,capital-gain,capital-loss,workclass,education,education-num,marital-status,relationship,race,gender,native-country,income,occupation,income-category
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2,10,17,0,0,Private,11th,7,Never-married,Own-child,White,Male,United-States,2600,Other-service,0
3,50,51,0,0,Private,HS-grad,9,Married-civ-spouse,Husband,Asian-Pac-Islander,Male,Cambodia,38997,Sales,1
8,40,45,0,0,Self-emp-not-inc,HS-grad,9,Married-civ-spouse,Husband,White,Male,England,33781,Craft-repair,1
11,42,29,0,0,Private,HS-grad,9,Married-civ-spouse,Husband,White,Male,United-States,30876,Handlers-cleaners,1
12,30,33,0,0,Private,HS-grad,9,Separated,Not-in-family,White,Male,United-States,3866,Machine-op-inspct,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49516,40,25,0,0,Private,HS-grad,9,Divorced,Own-child,Asian-Pac-Islander,Female,Philippines,24457,Handlers-cleaners,1
49517,40,25,0,0,Private,1st-4th,2,Never-married,Not-in-family,Asian-Pac-Islander,Female,Philippines,29000,Other-service,1
49523,40,34,0,0,Private,11th,7,Never-married,Own-child,Asian-Pac-Islander,Male,Philippines,10164,Other-service,0
49524,40,32,0,0,Private,12th,8,Never-married,Own-child,Asian-Pac-Islander,Male,Philippines,18720,Farming-fishing,0


In [11]:
import lime
import lime.lime_tabular

feature_names = ["age", "workclass", "education-num", "marital-status",
                 "occupation", "relationship", "race", "gender",
                 "capital-gain", "capital-loss", "hours-per-week", "native-country"]
label_names = ["income-category"]

categorical_features = ["workclass",
                   "marital-status",
                   "relationship",
                   "race",
                   "native-country",
                   "occupation", "gender"]

train = whole_census[whole_census["education-num"]<10].to_numpy()

explainer = lime.lime_tabular.LimeTabularExplainer(train,
                                                   class_names=label_names,
                                                   feature_names=feature_names,
                                                   categorical_names=categorical_features,
                                                   kernel_width=3)

TypeError: can't multiply sequence by non-int of type 'float'