In [1]:
#!/usr/bin/env python
import wuml


##	We generated a synthetic data for regression with 4 dimensions where
##	x1 x2 has positive influence
##	x3 has no influence
##	x4 has negative influence
#
#	The key to this example is to show that if we use Gaussian type of data instead of
#	Uniform, the explanation labels no longer have the correct signs. (Magnitude still makes sense)
#	Implying that perhaps we should always map data to uniform distribution

data = wuml.wData(xpath='../../data/shap_regress_example_gaussian.csv', batch_size=20, 
					label_type='continuous', label_column_name='label', 
					first_row_is_label=True, preprocess_data= 'between 0 and 1')

data.df.style

2021-10-21 12:02:24.283583: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


Unnamed: 0,A,B,C,D
0,0.851734,0.564529,0.846836,0.95973
1,0.873715,0.226925,0.839486,0.359315
2,0.464868,0.567164,0.555429,0.860818
3,0.636233,0.493199,0.674364,0.530945
4,0.790594,0.409231,0.623484,0.165527
5,0.018829,0.627767,0.816075,0.190346
6,0.943024,0.138056,0.51554,0.347463
7,0.799506,0.808078,0.559854,0.547047
8,0.252696,0.068555,0.36228,0.466701
9,0.730911,0.754123,0.348096,0.310579


In [10]:
EXP = wuml.explainer(data, 	loss='mse',		# This will create a network for regression and explain instance wise 
						networkStructure=[(400,'relu'),(400,'relu'),(400,'relu'),(1,'none')], 
						max_epoch=400, learning_rate=0.001, print_network_training_status=False)

# Show the regression results
Ŷ = EXP.model(data)
SR_train = wuml.summarize_regression_result(data.Y, Ŷ)
print(SR_train.true_vs_predict(print_result=False))


Network Info:
	Learning rate: 0.001
	Max number of epochs: 400
	Cost Function: mse
	Train Loop Callback: None
	Cuda Available: True
	Network Structure
		Linear(in_features=4, out_features=400, bias=True) , relu
		Linear(in_features=400, out_features=400, bias=True) , relu
		Linear(in_features=400, out_features=400, bias=True) , relu
		Linear(in_features=400, out_features=1, bias=True) , none

Avg error: 0.0553

['y' 'ŷ']
[-18.04 -17.94]
[  7.69   7.74]
[-16.01 -15.94]
[  1.14   1.24]
[ 12.33  12.32]
[ -8.95  -8.89]
[  8.02   8.06]
[  8.09   8.11]
[ -5.95  -5.94]
[ 11.07  11.07]
[-28.4  -28.35]
[-10.19 -10.14]
[-11.34 -11.25]
[ -6.92  -6.78]
[  3.29   3.39]
[  4.38   4.4 ]
[ -3.71  -3.65]
[ -5.6   -5.54]
[  7.6    7.63]
[ -4.9   -4.93]
[  1.58   1.61]
[ 13.54  13.63]
[-18.2  -18.12]
[ -4.42  -4.38]
[ -1.66  -1.56]
[ -4.1   -4.04]
[ -1.35  -1.32]
[  5.06   5.12]
[-11.76 -11.7 ]
[ -4.65  -4.67]



In [11]:
# Show the explanation results
explanation = EXP(data)	# outputs the weight importance
print(explanation)

  0%|          | 0/30 [00:00<?, ?it/s]

            0         1         2          3
0   14.732901  1.622899 -3.096718 -29.911076
1   16.537953  1.135006 -2.901353  -5.736407
2    9.713469  1.691893 -0.918515 -25.133383
3   13.458563  1.606165 -1.532466 -10.997820
4   15.290712  1.524979 -1.424396  -1.780772
5    0.598057 -0.630977 -4.050852  -3.514857
6   15.263202  0.606546 -1.372633  -5.142946
7   17.161007  3.041082 -0.487936 -10.310365
8    5.430164  0.227658 -0.578422  -9.728987
9   15.672961  1.657520 -0.345617  -4.627913
10   3.739357  0.270274 -0.039342 -31.028291
11   7.684911  0.852605 -0.091021 -17.293673
12   2.052480  0.210874 -0.033790 -12.193646
13   7.518036  0.558108 -0.962396 -12.598482
14  10.672254  0.204042 -0.637186  -5.563341
15   6.951727 -0.037884 -0.533622  -0.693410
16  10.375360  0.601219 -0.067943 -13.269692
17   6.771822  0.766178 -2.506760  -9.277646
18  13.339538  0.698924 -2.182242  -2.940124
19   5.641644  0.087325 -0.567684  -8.802877
20   5.502896  0.532086 -2.062118  -1.068696
21  18.788