# Many to One Explainer - Demo of Conjunctive vs Disjunctive Explanations

This demo measures the differences in quality metrics and runtime of explanation generation between conjunctive and disjunctive explanations, on various datasets.\

In [35]:
import pandas as pd
import pd_explain
import time
import numpy as np
from sklearn.cluster import KMeans
separation_error = 0.8

## Adults dataset

In [36]:
adults = pd.read_csv(r"..\Examples\Datasets\adult.csv")
adults

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,label
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,39,Private,215419,Bachelors,13,Divorced,Prof-specialty,Not-in-family,White,Female,0,0,36,United-States,<=50K
48838,64,?,321403,HS-grad,9,Widowed,?,Other-relative,Black,Male,0,0,40,United-States,<=50K
48839,38,Private,374983,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,50,United-States,<=50K
48840,44,Private,83891,Bachelors,13,Divorced,Adm-clerical,Own-child,Asian-Pac-Islander,Male,5455,0,40,United-States,<=50K


In [37]:
adults_comparison_df = pd.DataFrame(columns=['Average Coverage', 'Average Separation Error', 'Runtime'],
                                    index=pd.MultiIndex(levels=[[], []], codes=[[], []], names=['Test', 'Explanation Type']))

### Full dataset test

In [38]:
start_time = time.time()
conj_res = adults.explain(explainer='many to one', labels='label', explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('Full dataset', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 1.2492327690124512


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
<=50K,0 <= capital-gain <= 4243.5,0.99,0.2
<=50K,0.0 <= capital-gain <= 19999.8,1.0,0.23
<=50K,0 <= capital-gain <= 4668.5,0.99,0.2
<=50K,0 <= capital-gain <= 4668.5 AND 1 <= education-num <= 10,0.75,0.13
<=50K,0.0 <= capital-gain <= 24999.75,1.0,0.23
<=50K,0.0 <= capital-gain <= 16666.5,1.0,0.23
<=50K,1 <= education-num <= 10,0.76,0.15
<=50K,0 <= capital-gain <= 4243.5 AND 1 <= education-num <= 10,0.75,0.13
>50K,8.5 <= education-num <= 16.0,0.97,0.73
>50K,marital-status == Married-civ-spouse,0.85,0.55


In [39]:
start_time = time.time()
disj_res = adults.explain(explainer='many to one', labels='label', explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('Full dataset', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 1.691772699356079


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
<=50K,(0 <= capital-gain <= 4668.5),0.99,0.2
<=50K,(0.0 <= capital-gain <= 19999.8),1.0,0.23
<=50K,(6.0 <= education-num <= 11.0) OR (0 <= capital-gain <= 4668.5),1.0,0.22
<=50K,(0 <= capital-gain <= 4243.5),0.99,0.2
<=50K,(6.0 <= education-num <= 11.0) OR (0 <= capital-gain <= 4243.5),1.0,0.22
<=50K,(0.0 <= capital-gain <= 24999.75),1.0,0.23
<=50K,(1 <= education-num <= 10) OR (0 <= capital-gain <= 4243.5),1.0,0.22
<=50K,(0.0 <= capital-gain <= 16666.5),1.0,0.23
<=50K,(1 <= education-num <= 10) OR (0 <= capital-gain <= 4668.5),1.0,0.22
<=50K,(1 <= education-num <= 10),0.76,0.15


### Numeric only and categorical only tests

In [40]:
numeric_only = adults[['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']]
categorical_only = adults[['workclass', 'education', 'marital-status', 'occupation', 'relationship']]
labels = adults['label']

In [41]:
start_time = time.time()
conj_res = numeric_only.explain(explainer='many to one', labels=labels, explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('Numeric only', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 1.1632187366485596


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
<=50K,0 <= capital-gain <= 4243.5,0.99,0.2
<=50K,0.0 <= capital-gain <= 19999.8,1.0,0.23
<=50K,0 <= capital-gain <= 4668.5,0.99,0.2
<=50K,0 <= capital-gain <= 4668.5 AND 1 <= education-num <= 10,0.75,0.13
<=50K,0.0 <= capital-gain <= 24999.75,1.0,0.23
<=50K,0.0 <= capital-gain <= 16666.5,1.0,0.23
<=50K,1 <= education-num <= 10,0.76,0.15
<=50K,0 <= capital-gain <= 4243.5 AND 1 <= education-num <= 10,0.75,0.13
>50K,8.5 <= education-num <= 16.0,0.97,0.73
>50K,8.5 <= education-num <= 16.0 AND 29.5 <= age <= 90,0.91,0.65


In [42]:
start_time = time.time()
disj_res = numeric_only.explain(explainer='many to one', labels=labels, explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('Numeric only', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 2.143446922302246


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
<=50K,(0 <= capital-gain <= 4668.5),0.99,0.2
<=50K,(0.0 <= capital-gain <= 19999.8),1.0,0.23
<=50K,(6.0 <= education-num <= 11.0) OR (0 <= capital-gain <= 4668.5),1.0,0.22
<=50K,(0 <= capital-gain <= 4243.5),0.99,0.2
<=50K,(6.0 <= education-num <= 11.0) OR (0 <= capital-gain <= 4243.5),1.0,0.22
<=50K,(0.0 <= capital-gain <= 24999.75),1.0,0.23
<=50K,(1 <= education-num <= 10) OR (0 <= capital-gain <= 4243.5),1.0,0.22
<=50K,(0.0 <= capital-gain <= 16666.5),1.0,0.23
<=50K,(1 <= education-num <= 10) OR (0 <= capital-gain <= 4668.5),1.0,0.22
<=50K,(1 <= education-num <= 10),0.76,0.15


In [43]:
start_time = time.time()
conj_res = categorical_only.explain(explainer='many to one', labels=labels, explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('Categorical only', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 0.47734808921813965


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
<=50K,occupation != Prof-specialty AND education != Bachelors,0.82,0.18
<=50K,occupation != Prof-specialty,0.91,0.21
>50K,marital-status == Married-civ-spouse,0.85,0.55


In [44]:
start_time = time.time()
disj_res = categorical_only.explain(explainer='many to one', labels=labels, explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('Categorical only', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 0.6061787605285645


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
<=50K,occupation != Prof-specialty OR education != Bachelors,0.96,0.23
<=50K,occupation != Prof-specialty,0.91,0.21
>50K,marital-status == Married-civ-spouse,0.85,0.55


### Groupby tests

In [45]:
single_attribute_groupby = adults.groupby('relationship').mean()
multi_attribute_groupby = adults.groupby(['sex', 'label']).mean()

In [46]:
start_time = time.time()
conj_res = single_attribute_groupby.explain(explainer='many to one', explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('Single attribute groupby', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 2.243629217147827


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
Not-in-family,marital-status != Married-civ-spouse,1.0,0.53
Not-in-family,marital-status != Married-civ-spouse AND 25.5 <= age <= 90,0.81,0.43
Not-in-family,marital-status != Married-civ-spouse AND 19.5 <= age <= 90,0.98,0.49
Not-in-family,marital-status != Married-civ-spouse AND 22.5 <= age <= 90,0.91,0.45
Husband,marital-status == Married-civ-spouse AND sex != Female,1.0,0.01
Husband,marital-status == Married-civ-spouse AND sex == Male,1.0,0.01
Husband,marital-status == Married-civ-spouse,1.0,0.12
Wife,marital-status == Married-civ-spouse AND sex == Female,0.99,0.07
Wife,marital-status == Married-civ-spouse AND sex != Male,0.99,0.07
Own-child,marital-status != Married-civ-spouse AND 17.0 <= age <= 29.166666666666664,0.79,0.49


In [47]:
start_time = time.time()
disj_res = single_attribute_groupby.explain(explainer='many to one', explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('Single attribute groupby', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 2.6881487369537354


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
Not-in-family,marital-status != Married-civ-spouse,1.0,0.53
Husband,marital-status == Married-civ-spouse,1.0,0.12
Own-child,marital-status != Married-civ-spouse OR (17.0 <= age <= 35.25),1.0,0.77
Own-child,(17.0 <= age <= 35.25),0.9,0.7
Own-child,(17.0 <= age <= 31.6) OR marital-status == Never-married,0.94,0.67
Own-child,(17 <= age <= 28),0.78,0.55
Own-child,marital-status != Married-civ-spouse,0.98,0.72
Own-child,(17.0 <= age <= 35.25) OR marital-status == Never-married,0.95,0.71
Own-child,(17 <= age <= 28) OR marital-status == Never-married,0.93,0.63
Own-child,marital-status == Never-married,0.89,0.58


In [48]:
start_time = time.time()
conj_res = multi_attribute_groupby.explain(explainer='many to one', explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('Multi attribute groupby', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 1.185699462890625


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
"('Male', '<=50K')",0.0 <= capital-gain <= 19999.8,1.0,0.53
"('Male', '<=50K')",0 <= capital-gain <= 4932.5,0.99,0.51
"('Male', '<=50K')",0 <= capital-gain <= 4668.5,0.99,0.51
"('Male', '<=50K')",relationship != infrequent_sklearn AND 0 <= capital-gain <= 4668.5,0.96,0.49
"('Male', '<=50K')",relationship != infrequent_sklearn AND 0 <= capital-gain <= 4932.5,0.96,0.49
"('Male', '<=50K')",0.0 <= capital-gain <= 24999.75,1.0,0.53
"('Male', '<=50K')",0.0 <= capital-gain <= 16666.5,1.0,0.53
"('Male', '<=50K')",0.0 <= capital-gain <= 49999.5,1.0,0.53
"('Male', '<=50K')",0.0 <= capital-gain <= 33333.0,1.0,0.53
"('Male', '<=50K')",0 <= capital-gain <= 99999,1.0,0.53


In [49]:
start_time = time.time()
disj_res = multi_attribute_groupby.explain(explainer='many to one', explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('Multi attribute groupby', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 1.568009376525879


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
"('Male', '<=50K')",(0 <= capital-gain <= 4668.5),0.99,0.51
"('Male', '<=50K')",(0.0 <= capital-gain <= 19999.8),1.0,0.53
"('Male', '<=50K')",(0 <= capital-gain <= 4932.5),0.99,0.51
"('Male', '<=50K')",(0.0 <= capital-gain <= 49999.5),1.0,0.53
"('Male', '<=50K')",(0.0 <= capital-gain <= 24999.75),1.0,0.53
"('Male', '<=50K')",(0 <= capital-gain <= 99999),1.0,0.53
"('Male', '<=50K')",(0.0 <= capital-gain <= 16666.5),1.0,0.53
"('Male', '<=50K')",(0.0 <= capital-gain <= 33333.0),1.0,0.53
"('Female', '<=50K')",relationship != Husband,1.0,0.5
"('Male', '>50K')",relationship != infrequent_sklearn,1.0,0.78


### Clustering test

#### 3 clusters

In [50]:
cluster_labels = KMeans(n_clusters=3).fit_predict(pd.get_dummies(adults))

In [51]:
start_time = time.time()
conj_res = adults.explain(explainer='many to one', labels=cluster_labels, explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('3 clusters', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 2.1946029663085938


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
0,149630.5 <= fnlwgt <= 291625.5,1.0,0.0
1,12285 <= fnlwgt <= 149630.5,1.0,0.0
2,291625.5 <= fnlwgt <= 1490400,1.0,0.0


In [52]:
start_time = time.time()
disj_res = adults.explain(explainer='many to one', labels=cluster_labels, explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('3 clusters', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 3.3409926891326904


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
0,(149630.5 <= fnlwgt <= 291625.5),1.0,0.0
1,(12285 <= fnlwgt <= 149630.5),1.0,0.0
2,(291625.5 <= fnlwgt <= 1490400),1.0,0.0


#### 15 clusters

In [53]:
cluster_labels = KMeans(n_clusters=15).fit_predict(pd.get_dummies(adults))

In [54]:
start_time = time.time()
conj_res = adults.explain(explainer='many to one', labels=cluster_labels, explanation_form='conj', separation_threshold=separation_error)
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
adults_comparison_df.loc[('15 clusters', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

Conjunctive explanation runtime: 11.032832860946655


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
0,58263.5 <= fnlwgt <= 93908.0,1.0,0.0
1,281946.5 <= fnlwgt <= 1490400,1.0,0.65
2,178147 <= fnlwgt <= 237646,1.0,0.43
2,178147 <= fnlwgt <= 210736,0.96,0.18
3,433247.5 <= fnlwgt <= 1490400 AND 17.0 <= age <= 41.33333333333333,0.74,0.34
3,433247.5 <= fnlwgt <= 1490400,1.0,0.36
3,433247.5 <= fnlwgt <= 1490400 AND 17.0 <= age <= 53.5,0.9,0.35
4,673088.0 <= fnlwgt <= 1490400,1.0,0.16
5,210736 <= fnlwgt <= 279173,1.0,0.43
5,210736 <= fnlwgt <= 279173 AND occupation != Prof-specialty,0.89,0.42


In [55]:
start_time = time.time()
disj_res = adults.explain(explainer='many to one', labels=cluster_labels, explanation_form='disj', separation_threshold=separation_error)
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
adults_comparison_df.loc[('15 clusters', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

Disjunctive explanation runtime: 10.4983069896698


Unnamed: 0_level_0,Unnamed: 1_level_0,Coverage,Separation Error
Group / Cluster,Explanation,Unnamed: 2_level_1,Unnamed: 3_level_1
0,(58263.5 <= fnlwgt <= 93908.0),1.0,0.0
1,(281946.5 <= fnlwgt <= 1490400),1.0,0.65
2,(178147 <= fnlwgt <= 210736),0.96,0.18
2,(178147 <= fnlwgt <= 237646),1.0,0.43
3,(433247.5 <= fnlwgt <= 1490400),1.0,0.36
4,(673088.0 <= fnlwgt <= 1490400),1.0,0.16
5,(210736 <= fnlwgt <= 279173),1.0,0.43
6,(93908.0 <= fnlwgt <= 126201.5),1.0,0.0
7,(12285 <= fnlwgt <= 58263.5),1.0,0.0
8,(371701.5 <= fnlwgt <= 1490400),1.0,0.44


### Summary of results

In [56]:
adults_comparison_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Coverage,Average Separation Error,Runtime
Test,Explanation Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Full dataset,conjunctive,0.9075,0.3375,1.249233
Full dataset,disjunctive,0.968571,0.35,1.691773
Numeric only,conjunctive,0.882667,0.41,1.163219
Numeric only,disjunctive,0.962941,0.421176,2.143447
Categorical only,conjunctive,0.86,0.313333,0.477348
Categorical only,disjunctive,0.906667,0.33,0.606179
Single attribute groupby,conjunctive,0.875312,0.482813,2.243629
Single attribute groupby,disjunctive,0.924615,0.605385,2.688149
Multi attribute groupby,conjunctive,0.984667,0.532667,1.185699
Multi attribute groupby,disjunctive,0.988182,0.548182,1.568009


## Spotify dataset

In [57]:
spotify = pd.read_csv(r"..\Examples\Datasets\spotify_all.csv")
spotify

Unnamed: 0,acousticness,artists,danceability,energy,explicit,id,instrumentalness,key,liveness,loudness,...,name,popularity,speechiness,tempo,valence,year,decade,popularity_score,main_artist,duration_minutes
0,0.991000,['Mamie Smith'],0.598,0.224,0,0cS0A1fUEUd1EW3FcF8AEI,0.000522,5,0.3790,-12.628,...,Keep A Song In Your Soul,12,0.0936,149.976,0.6340,1920,1920,10,Mamie Smith,2.805550
1,0.643000,"[""Screamin' Jay Hawkins""]",0.852,0.517,0,0hbkKFIJm7Z05H8Zl9w30f,0.026400,5,0.0809,-7.261,...,I Put A Spell On You,7,0.0534,86.889,0.9500,1920,1920,0,Screamin' Jay Hawkins,2.503333
2,0.993000,['Mamie Smith'],0.647,0.186,0,11m7laMUgmOKqI3oYzuhne,0.000018,0,0.5190,-12.098,...,Golfing Papa,4,0.1740,97.600,0.6890,1920,1920,0,Mamie Smith,2.730450
3,0.000173,['Oscar Velazquez'],0.730,0.798,0,19Lc5SfJJ5O1oaxY0fpwfh,0.801000,2,0.1280,-7.311,...,True House Music - Xavier Santos & Carlos Gomi...,17,0.0425,127.997,0.0422,1920,1920,10,Oscar Velazquez,7.034783
4,0.295000,['Mixe'],0.704,0.707,1,2hJjbsLCytGsnAHfdsLejp,0.000246,10,0.4020,-6.036,...,Xuniverxe,2,0.0768,122.076,0.2990,1920,1920,0,Mixe,2.753733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174384,0.009170,"['DJ Combo', 'Sander-7', 'Tony T']",0.792,0.866,0,46LhBf6TvYjZU2SMvGZAbn,0.000060,6,0.1780,-5.089,...,The One,0,0.0356,125.972,0.1860,2020,2020,0,DJ Combo,2.460250
174385,0.795000,['Alessia Cara'],0.429,0.211,0,7tue2Wemjd0FZzRtDrQFZd,0.000000,4,0.1960,-11.665,...,A Little More,0,0.0360,94.710,0.2280,2021,2020,0,Alessia Cara,2.412000
174386,0.806000,['Roger Fly'],0.671,0.589,0,48Qj61hOdYmUCFJbpQ29Ob,0.920000,4,0.1130,-12.393,...,Together,0,0.0282,108.058,0.7140,2020,2020,0,Roger Fly,3.635783
174387,0.920000,['Taylor Swift'],0.462,0.240,1,1gcyHQpBQ1lfXGdhZmWrHP,0.000000,0,0.1130,-12.077,...,champagne problems,69,0.0377,171.319,0.3200,2021,2020,60,Taylor Swift,4.066667


In [58]:
spotify_comparison_df = pd.DataFrame(columns=['Average Coverage', 'Average Separation Error', 'Runtime'],
                                    index=pd.MultiIndex(levels=[[], []], codes=[[], []], names=['Test', 'Explanation Type']))

### Full dataset test

In [59]:
start_time = time.time()
conj_res = spotify.explain(explainer='many to one', labels='key', explanation_form='conj')
conj_time = time.time() - start_time
print(f"Conjunctive explanation runtime: {conj_time}")
spotify_comparison_df.loc[('Full dataset', 'conjunctive'), :] = [np.mean(conj_res['Coverage']), np.mean(conj_res['Separation Error']), conj_time]
conj_res

KeyboardInterrupt: 

In [34]:
start_time = time.time()
disj_res = spotify.explain(explainer='many to one', labels='key', explanation_form='disj')
disj_time = time.time() - start_time
print(f"Disjunctive explanation runtime: {disj_time}")
spotify_comparison_df.loc[('Full dataset', 'disjunctive'), :] = [np.mean(disj_res['Coverage']), np.mean(disj_res['Separation Error']), disj_time]
disj_res

KeyboardInterrupt: 

In [83]:
spotify_comparison_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Average Coverage,Average Separation Error,Runtime
Test,Explanation Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Full dataset,conjunctive,,,28.525063
Full dataset,disjunctive,,,39.150858


### Numeric only and categorical only tests

In [None]:
numeric_only = spotify[['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']]
categorical_only = spotify[['name', 'main_artist', 'year']]
labels = spotify['decade']