In [None]:
!unzip Marlena.zip

In [None]:
from XMTR import MTR
from Marlena.algorithms.MARLENA.marlena.marlena.marlena import MARLENA
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split, KFold
import warnings
warnings.simplefilter(action='ignore')
import time
import csv

In [None]:
def calc_al_error(instance, perc):
  # the error should be non zero
  return (abs(instance)+0.1)*perc 

fb_df = pd.read_csv('dataset_Facebook.csv', sep=';')#, nrows=200)

# fill NaN
fb_df['like'].fillna(0,inplace=True)
fb_df['share'].fillna(0,inplace=True)
fb_df['Paid'].fillna(0,inplace=True)
fb_df.drop(['Type'], inplace=True, axis=1)


# get column names
column_names = fb_df.columns

# get data/target names
f_n = column_names[:14]
t_n = column_names[14:]

index_of_features = {}
index = 0
for feature in f_n:
  index_of_features[feature] = index
  index += 1

X = fb_df[f_n]
y = fb_df[t_n]

# convert to numpy
X = X.to_numpy()
y = y.to_numpy()

scaler = MinMaxScaler(feature_range=(1,10))
y = scaler.fit_transform(y)

# split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=42)

# MTR
#RFmodel = RandomForestRegressor(n_estimators=500, max_depth=3, min_samples_leaf=5, max_features=None, n_jobs=-1, random_state=42)
RFmodel = RandomForestRegressor(n_estimators=500, max_depth=2, min_samples_leaf=5, max_features=0.75, n_jobs=-1, random_state=42)
RFmodel.fit(X_train, y_train)
MTR_obj = MTR(model=RFmodel, X_train = X_train, X_test=X_test, y_train=y_train, y_test=y_test, feature_names=f_n, target_names=t_n)
model = MTR_obj.getModel()

# ALTER FEATURES THAT ARE ONLY IN XMTR RULE

In [None]:
random_instance = 0 # the examined instance
percentage = 0.1 # percentage of allowed error

instance = X_test[random_instance]
error = calc_al_error(y_test[random_instance], percentage)

# MTR RULE
MTRrule = MTR_obj.explain(instance, error)
feature_limits = MTR_obj.getFeatureLimits()
decisionsAndErrors = MTR_obj.getDecisionsAndErros()

# MARLENA RULE
marlena = MARLENA(neigh_type='mixed', random_state=42)
i2e = pd.Series(instance, index=f_n)
X2E = pd.DataFrame(X_train, columns=f_n)
# returns rule, mask(MarlenaPrediction), list_split_conditions, len_rule, instance_imporant_feat, fidelity, hit, DT
MARrule, MarlenaPrediction, list_split_conditions, len_rule, _, _, _, _ = marlena.extract_explanation(i2e, X2E, model, f_n, [],
                                          t_n, k=10, size=50, alpha=0.7)

In [None]:
# THE ACTUAL VALUES OF EACH FEATURE
for i in range(len(instance)):
  print(f_n[i], ": ", instance[i])

Page total likes :  116091.0
Category :  3.0
Post Month :  5.0
Post Weekday :  6.0
Post Hour :  3.0
Paid :  0.0
Lifetime Post Total Reach :  3332.0
Lifetime Post Total Impressions :  5797.0
Lifetime Engaged Users :  463.0
Lifetime Post Consumers :  406.0
Lifetime Post Consumptions :  552.0
Lifetime Post Impressions by people who have liked your Page :  4347.0
Lifetime Post reach by people who like your Page :  2330.0
Lifetime People who have liked your Page and engaged with your post :  341.0


In [None]:
# THE ACTUAL VALUES OF EACH TARGET
for i in range(len(y_test[random_instance])):
  print(t_n[i], ": ", scaler.inverse_transform(np.array([y_test[random_instance]]).reshape(1, -1))[0][i])

comment :  0.0
like :  87.00000000000007
share :  18.0
Total Interactions :  105.00000000000001


**Final rules** (predictions are not inverse transformed here)

In [None]:
# XMTR RULE
MTRrule

'if 236.0<=Lifetime Post reach by people who like your Page<=9150.0 & 341.0<=Lifetime Engaged Users<=597.0 & 9.0<=Lifetime People who have liked your Page and engaged with your post<=378.0 & 567.0<=Lifetime Post Impressions by people who have liked your Page<=19668.5 & 570.0<=Lifetime Post Total Impressions<=6122.5 & 2333.5<=Lifetime Post Total Reach<=3346.0 & 9.0<=Lifetime Post Consumers<=2244.5 then comment: 1.0935 +/- 0.0 error, like: 1.1705 +/- 0.0 error, share: 1.2056 +/- 0.0 error, Total Interactions: 1.1704 +/- 0.0 error'

In [None]:
# MARLENA RULE
MARrule, list_split_conditions

(['{Lifetime People who have liked your Page and engaged with your post,\n Lifetime Post Total Reach,\n Lifetime Post Total Impressions,\n Lifetime Engaged Users,\n Lifetime Post Consumptions} -> comment:1.093512138046181 like:1.170537774906238 share:1.2056097090262126 Total Interactions:1.1703883102776997 '],
 {'Lifetime People who have liked your Page and engaged with your post': [['<=',
    476.87],
   ['<=', 400.5]],
  'Lifetime Post Total Reach': [['<=', 3851.15], ['<=', 3373.97]],
  'Lifetime Post Total Impressions': [['<=', 6399.78], ['<=', 6146.1]],
  'Lifetime Engaged Users': [['<=', 589.01]],
  'Lifetime Post Consumptions': [['>', 542.0]]})

Features present on rules

In [None]:
XMTR_ft = set(feature_limits.keys()) # features present on XMTR rule
MARLENA_ft = set(list_split_conditions.keys()) # features present on MARLENA rule

ft_both_rules = XMTR_ft.intersection(MARLENA_ft) # the features tah are present on both rules
ft_extra_on_MTR = XMTR_ft.difference(MARLENA_ft) # features that are present in XMTR but not in MARLENA
ft_extra_on_MARLENA = MARLENA_ft.difference(XMTR_ft) # features that are present in MARLENA but not in XMTR

print("XMTR:", len(XMTR_ft), " MARLENAl", len(MARLENA_ft)) # rule lenghts
print("both:", len(ft_both_rules), ft_both_rules) # features present on both rules
print("extra_on_XMTR:", len(ft_extra_on_MTR), ft_extra_on_MTR) # features present on XMTR and not in MARLENA
print("extra_on_MARLENA:", len(ft_extra_on_MARLENA), ft_extra_on_MARLENA) # features present in MARLENA and not in XMTR

XMTR: 7  MARLENAl 5
both: 4 {'Lifetime Engaged Users', 'Lifetime People who have liked your Page and engaged with your post', 'Lifetime Post Total Impressions', 'Lifetime Post Total Reach'}
extra_on_XMTR: 3 {'Lifetime Post Impressions by people who have liked your Page', 'Lifetime Post Consumers', 'Lifetime Post reach by people who like your Page'}
extra_on_MARLENA: 1 {'Lifetime Post Consumptions'}


**Feature ranges**

In [None]:
# feature reanges of XMTR
print("XMTR")
for key in feature_limits.keys():
  print("   ", key, feature_limits[key])
  
print("="*100)
# feature ranges of MARLENA
print("MARLENA")
for key in list_split_conditions.keys():
  print("   ", key, list_split_conditions[key])

XMTR
    Lifetime Post reach by people who like your Page [236.0, 9150.0]
    Lifetime Engaged Users [341.0, 597.0]
    Lifetime People who have liked your Page and engaged with your post [9.0, 378.0]
    Lifetime Post Impressions by people who have liked your Page [567.0, 19668.5]
    Lifetime Post Total Impressions [570.0, 6122.5]
    Lifetime Post Total Reach [2333.5, 3346.0]
    Lifetime Post Consumers [9.0, 2244.5]
MARLENA
    Lifetime People who have liked your Page and engaged with your post [['<=', 476.87], ['<=', 400.5]]
    Lifetime Post Total Reach [['<=', 3851.15], ['<=', 3373.97]]
    Lifetime Post Total Impressions [['<=', 6399.78], ['<=', 6146.1]]
    Lifetime Engaged Users [['<=', 589.01]]
    Lifetime Post Consumptions [['>', 542.0]]


**Target values and Predictions** (with inverse transformation)

In [None]:
# actual target values
print("Actual values:", scaler.inverse_transform(np.array([y_test[random_instance]]).reshape(1, -1)))

# predictions of XMTR
MTRpred = np.array([np.array(decisionsAndErrors)[:,0]])
print("   XMTR preds:", scaler.inverse_transform(MTRpred.reshape(1, -1)))

# predictions of Marlena
print("MARLENA preds:", scaler.inverse_transform(MarlenaPrediction.reshape(1, -1)))

Actual values [[  0.  87.  18. 105.]]
   XMTR preds [[  3.86516837  98.00237465  18.04796335 119.91550637]]
MARLENA preds [[  3.86516837  98.00237465  18.04796335 119.91550637]]


Change features of the examined instance

In [None]:
instance_altered = instance.copy() # get a copy of the examined instance

# for each feature that is in XMTR rule and is not in MARLENA rule
for f in ft_extra_on_MTR:
  index = index_of_features[f] # get the index of that feature
  rng = feature_limits[f] # get the range of the feature (given from XMTR)

  # check which of min or max is further from the actual value
  from_min = abs(instance[index]-rng[0])
  from_max = abs(instance[index]-rng[1])
  if from_min > from_max:
    value = rng[0]#+rng[0]*0.001
  else:
    value = rng[1]#-rng[1]*0.001
  print("feature:", f, " from ", instance[index], " to ", value)
  # give the furthest value on the instance feature
  instance_altered[index] = value

feature: Lifetime Post Impressions by people who have liked your Page  from  4347.0  to  19668.5
feature: Lifetime Post Consumers  from  406.0  to  2244.5
feature: Lifetime Post reach by people who like your Page  from  2330.0  to  9150.0


CREATE EXPLANATIONS FOR THE ALTERED INSTANCE

In [None]:
# XMTR
MTRrule_new = MTR_obj.explain(instance_altered, error)
feature_limits_new = MTR_obj.getFeatureLimits()
decisionsAndErrors_new = MTR_obj.getDecisionsAndErros()

# MARLENA
marlena_new = MARLENA(neigh_type='mixed', random_state=42)
i2e_new = pd.Series(instance_altered, index=f_n)
X2E_new = pd.DataFrame(X_train, columns=f_n)
# returns rule, mask(MarlenaPrediction), list_split_conditions, len_rule, instance_imporant_feat, fidelity, hit, DT
MARrule_new, MarlenaPrediction_new, list_split_conditions_new, len_rule_new, _, _, _, _ = marlena_new.extract_explanation(i2e_new, X2E_new, model, f_n, [],
                                          t_n, k=10, size=50, alpha=0.7)


In [None]:
# XMTR RULE
MTRrule_new

'if 236.0<=Lifetime Post reach by people who like your Page<=9150.0 & 341.0<=Lifetime Engaged Users<=597.0 & 9.0<=Lifetime People who have liked your Page and engaged with your post<=378.0 & 567.0<=Lifetime Post Impressions by people who have liked your Page<=19668.5 & 570.0<=Lifetime Post Total Impressions<=6122.5 & 2333.5<=Lifetime Post Total Reach<=3346.0 & 9.0<=Lifetime Post Consumers<=2244.5 then comment: 1.0935 +/- 0.0 error, like: 1.1705 +/- 0.0 error, share: 1.2056 +/- 0.0 error, Total Interactions: 1.1704 +/- 0.0 error'

In [None]:
# MARLENA RULE
MARrule_new, list_split_conditions_new

(['{Lifetime People who have liked your Page and engaged with your post,\n Lifetime Post reach by people who like your Page,\n Lifetime Post Consumers,\n Lifetime Post Total Reach,\n Lifetime Post Total Impressions} -> comment:1.1026136795848607 like:1.183403860318009 share:1.218915747642533 Total Interactions:1.1830881741408132 '],
 {'Lifetime People who have liked your Page and engaged with your post': [['<=',
    498.22],
   ['<=', 473.39],
   ['<=', 418.5]],
  'Lifetime Post reach by people who like your Page': [['>', 2753.09],
   ['<=', 10720.45]],
  'Lifetime Post Consumers': [['>', 592.56]],
  'Lifetime Post Total Reach': [['<=', 5463.81]],
  'Lifetime Post Total Impressions': [['<=', 12716.8]]})

In [None]:
# see if XMTR preds stayed the same
decisionsAndErrors = np.round(np.array(decisionsAndErrors), decimals = 14)
decisionsAndErrors_new = np.round(np.array(decisionsAndErrors_new), decimals = 14)
print("are XMTR preds same? ->", np.array_equal(decisionsAndErrors, decisionsAndErrors_new))

are XMTR preds same? -> True


In [None]:
# see the difference in MARLENA preds
print("MARLENA dif", abs((MarlenaPrediction - MarlenaPrediction_new).mean()))

MARLENA dif 0.01199338235747116


In [None]:
print("MARLENA preds")
pred_before = scaler.inverse_transform(MarlenaPrediction.reshape(1, -1))
pred_after = scaler.inverse_transform(MarlenaPrediction_new.reshape(1, -1))
print(pred_before)
print(pred_after)

print("=============")
print("XMTR preds")
MTRpred = np.array([np.array(decisionsAndErrors)[:,0]])
MTRpred_original = scaler.inverse_transform(MTRpred.reshape(1, -1))

MTRpred = np.array([np.array(decisionsAndErrors_new)[:,0]])
MTRpred_after = scaler.inverse_transform(MTRpred.reshape(1, -1))

print(MTRpred_original)
print(MTRpred_after)

print("=============")
print("original")
y_original = scaler.inverse_transform(np.array([y_test[random_instance]]).reshape(1, -1))
print(y_original)

MARLENA preds
[[  3.86516837  98.00237465  18.04796335 119.91550637]]
[[  4.24136542 105.39608506  19.21593785 128.85338833]]
XMTR preds
[[  3.86516837  98.00237465  18.04796335 119.91550637]]
[[  3.86516837  98.00237465  18.04796335 119.91550637]]
original
[[  0.  87.  18. 105.]]


# ALTER FEATURES THAT ARE ONLY IN MARLENA RULE

In [101]:
# for the features that are only in the MARLENA rule, multiply them by 10^5
instance_altered2 = instance.copy()
for f in ft_extra_on_MARLENA:
  index = index_of_features[f] # get the index of that feature
  instance_altered2[index] = instance[index]*(10**10)
  print("feature:", f, " from ", instance[index], " to ", instance_altered2[index])

feature: Lifetime Post Consumptions  from  552.0  to  5520000000000.0


CREATE EXPLANATIONS FOR THE ALTERED INSTANCE

In [107]:
# XMTR
MTRrule_new2 = MTR_obj.explain(instance_altered2, error)
feature_limits_new2 = MTR_obj.getFeatureLimits()
decisionsAndErrors_new2 = MTR_obj.getDecisionsAndErros()

# MARLENA
marlena_new2 = MARLENA(neigh_type='mixed', random_state=42)
i2e_new2 = pd.Series(instance_altered2, index=f_n)
X2E_new2 = pd.DataFrame(X_train, columns=f_n)
# returns rule, mask(MarlenaPrediction), list_split_conditions, len_rule, instance_imporant_feat, fidelity, hit, DT
MARrule_new2, MarlenaPrediction_new2, list_split_conditions_new2, len_rule_new2, _, _, _, _ = marlena_new2.extract_explanation(i2e_new2, X2E_new2, model, f_n, [],
                                          t_n, k=10, size=50, alpha=0.7)


In [103]:
# XMTR RULE
MTRrule_new2

'if 236.0<=Lifetime Post reach by people who like your Page<=9150.0 & 341.0<=Lifetime Engaged Users<=597.0 & 9.0<=Lifetime People who have liked your Page and engaged with your post<=378.0 & 567.0<=Lifetime Post Impressions by people who have liked your Page<=19668.5 & 570.0<=Lifetime Post Total Impressions<=6122.5 & 2333.5<=Lifetime Post Total Reach<=3346.0 & 9.0<=Lifetime Post Consumers<=2244.5 then comment: 1.0935 +/- 0.0 error, like: 1.1705 +/- 0.0 error, share: 1.2056 +/- 0.0 error, Total Interactions: 1.1704 +/- 0.0 error'

In [104]:
# MARLENA RULE
MARrule_new2, list_split_conditions_new2

(['{Lifetime People who have liked your Page and engaged with your post,\n Lifetime Post Total Impressions,\n Lifetime Post Total Reach,\n Lifetime Engaged Users,\n Post Hour} -> comment:1.093512138046181 like:1.170537774906238 share:1.2056097090262126 Total Interactions:1.1703883102776997 '],
 {'Lifetime People who have liked your Page and engaged with your post': [['<=',
    488.66],
   ['<=', 445.96]],
  'Lifetime Post Total Impressions': [['<=', 6615.25]],
  'Lifetime Post Total Reach': [['<=', 4315.48]],
  'Lifetime Engaged Users': [['<=', 639.88]],
  'Post Hour': [['<=', 15.5]]})

In [105]:
# see if XMTR preds stayed the same
decisionsAndErrors = np.round(np.array(decisionsAndErrors), decimals = 14)
decisionsAndErrors_new2 = np.round(np.array(decisionsAndErrors_new2), decimals = 14)
print("are XMTR preds same? ->", np.array_equal(decisionsAndErrors, decisionsAndErrors_new2))

are XMTR preds same? -> True


In [108]:
# see the difference in MARLENA preds
print("MARLENA dif", abs((MarlenaPrediction - MarlenaPrediction_new2).mean()))

MARLENA dif 5.551115123125783e-17


In [109]:
for i in range(len(MarlenaPrediction_new2)):
  print(MarlenaPrediction[i], "->", MarlenaPrediction_new2[i])

1.093512138046181 -> 1.093512138046181
1.170537774906238 -> 1.170537774906238
1.2056097090262126 -> 1.2056097090262123
1.1703883102776997 -> 1.1703883102776997


In [115]:
np.set_printoptions(precision=16)
print("MARLENA preds")
pred_before = scaler.inverse_transform(MarlenaPrediction.reshape(1, -1))
pred_after = scaler.inverse_transform(MarlenaPrediction_new2.reshape(1, -1))

print(pred_before)
print(pred_after)

print("=============")
print("XMTR preds")
MTRpred = np.array([np.array(decisionsAndErrors)[:,0]])
MTRpred_original = scaler.inverse_transform(MTRpred.reshape(1, -1))

MTRpred = np.array([np.array(decisionsAndErrors_new2)[:,0]])
MTRpred_after = scaler.inverse_transform(MTRpred.reshape(1, -1))

print(MTRpred_original)
print(MTRpred_after)

print("=============")
print("original")
y_original = scaler.inverse_transform(np.array([y_test[random_instance]]).reshape(1, -1))
print(y_original)

MARLENA preds
[[  3.8651683725754804  98.00237464611811    18.047963347856435
  119.91550636655002  ]]
[[  3.8651683725754804  98.00237464611811    18.047963347856417
  119.91550636655002  ]]
XMTR preds
[[  3.8651683725754435  98.00237464611926    18.047963347856204
  119.91550636655018  ]]
[[  3.8651683725754893  98.00237464611811    18.047963347856417
  119.91550636655002  ]]
original
[[  0.                87.00000000000007  18.
  105.00000000000001]]
