In [1]:
%pip install --upgrade pip
%pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
from ucimlrepo import fetch_ucirepo 
from pprint import pp
import pandas

# fetch dataset 
mushroom = fetch_ucirepo(id=73) 


print('original')
print('******************************************************************')
print(mushroom.data.original.columns.values)
print('\n\n')

print('targets')
print('******************************************************************')
print(mushroom.data.targets.columns.values)
print('\n\n')

print('features')
print('******************************************************************')
print(mushroom.data.features.columns.values)
print('\n\n')

print('metadata')
print('******************************************************************')
pp(mushroom.metadata)
print('\n\n')

print('variables')
print('******************************************************************')
pp(mushroom.variables)

original
******************************************************************
['cap-shape' 'cap-surface' 'cap-color' 'bruises' 'odor' 'gill-attachment'
 'gill-spacing' 'gill-size' 'gill-color' 'stalk-shape' 'stalk-root'
 'stalk-surface-above-ring' 'stalk-surface-below-ring'
 'stalk-color-above-ring' 'stalk-color-below-ring' 'veil-type'
 'veil-color' 'ring-number' 'ring-type' 'spore-print-color' 'population'
 'habitat' 'poisonous']



targets
******************************************************************
['poisonous']



features
******************************************************************
['cap-shape' 'cap-surface' 'cap-color' 'bruises' 'odor' 'gill-attachment'
 'gill-spacing' 'gill-size' 'gill-color' 'stalk-shape' 'stalk-root'
 'stalk-surface-above-ring' 'stalk-surface-below-ring'
 'stalk-color-above-ring' 'stalk-color-below-ring' 'veil-type'
 'veil-color' 'ring-number' 'ring-type' 'spore-print-color' 'population'
 'habitat']



metadata
***************************************

In [3]:
features = mushroom.data.features.columns.values
featureCount = len(features)
featureValues = dict()
featurePairs = list()

for i, feature in enumerate(features):
    featureValues[feature] = mushroom.data.features[feature].unique()

for i in range(0, featureCount - 1):
    feature1 = features[i]
    for j in range(i + 1, featureCount):
        feature2 = features[j]
        featurePairs.append([feature1, feature2])

print('feature values')
print('***************************************')
pp(featureValues)
print('\n\n')

print('feature pairs: ', len(featurePairs))
print('********************************************')
pp(featurePairs)

feature values
***************************************
{'cap-shape': array(['x', 'b', 's', 'f', 'k', 'c'], dtype=object),
 'cap-surface': array(['s', 'y', 'f', 'g'], dtype=object),
 'cap-color': array(['n', 'y', 'w', 'g', 'e', 'p', 'b', 'u', 'c', 'r'], dtype=object),
 'bruises': array(['t', 'f'], dtype=object),
 'odor': array(['p', 'a', 'l', 'n', 'f', 'c', 'y', 's', 'm'], dtype=object),
 'gill-attachment': array(['f', 'a'], dtype=object),
 'gill-spacing': array(['c', 'w'], dtype=object),
 'gill-size': array(['n', 'b'], dtype=object),
 'gill-color': array(['k', 'n', 'g', 'p', 'w', 'h', 'u', 'e', 'b', 'r', 'y', 'o'],
      dtype=object),
 'stalk-shape': array(['e', 't'], dtype=object),
 'stalk-root': array(['e', 'c', 'b', 'r', nan], dtype=object),
 'stalk-surface-above-ring': array(['s', 'f', 'k', 'y'], dtype=object),
 'stalk-surface-below-ring': array(['s', 'f', 'y', 'k'], dtype=object),
 'stalk-color-above-ring': array(['w', 'g', 'p', 'n', 'b', 'e', 'o', 'c', 'y'], dtype=object),
 'sta

In [8]:
featureData = list()

for i, featurePair in enumerate(featurePairs):
    feature1 = featurePair[0]
    feature2 = featurePair[1]
    occurrences = mushroom.data.original.groupby([feature1, feature2, 'poisonous']).size()
    
    #print('occurrences of', feature1, '+', feature2)
    #print('**********************************************************************')
    #pp(occurrences)
    
    for v1, value1 in enumerate(featureValues[feature1]):
        for v2, value2 in enumerate(featureValues[feature2]):
            occurrenceFound = value1 in occurrences and value2 in occurrences[value1]
            occurrence = occurrences[value1][value2] if occurrenceFound else dict()
            featureData.append({
                'feature1_name': feature1,
                'feature1_value': value1,
                'feature2_name': feature2,
                'feature2_value': value2,
                'edible_count': occurrence['e'] if 'e' in occurrence else 0,
                'poisonous_count': occurrence['p'] if 'p' in occurrence else 0
            })

featureData = pandas.DataFrame(featureData)
edibleOnly = featureData.query('edible_count > 0 and poisonous_count == 0')
poisonousOnly = featureData.query('edible_count == 0 and poisonous_count > 0')
expectedCounts = mushroom.data.original.groupby('poisonous').size()

print('edible-only features')
print('number of edible mushrooms: ', expectedCounts['e'])
print('sum of edible_count: ', edibleOnly['edible_count'].sum())
print('********************************************************************************************************')
display(edibleOnly)
print('\n\n')

print('poisonous-only features')
print('number of poisonous mushrooms: ', expectedCounts['p'])
print('sum of poisonous_count: ', poisonousOnly['poisonous_count'].sum())
print('********************************************************************************************************')
display(poisonousOnly)

edible-only features
number of edible mushrooms:  4208
sum of edible_count:  171596
********************************************************************************************************


Unnamed: 0,feature1_name,feature1_value,feature2_name,feature2_value,edible_count,poisonous_count
10,cap-shape,s,cap-surface,f,32,0
31,cap-shape,x,cap-color,u,8,0
33,cap-shape,x,cap-color,r,8,0
37,cap-shape,b,cap-color,g,48,0
44,cap-shape,s,cap-color,n,16,0
...,...,...,...,...,...,...
6394,population,n,habitat,g,272,0
6395,population,n,habitat,m,128,0
6401,population,a,habitat,g,384,0
6414,population,y,habitat,u,48,0





poisonous-only features
number of poisonous mushrooms:  3916
sum of poisonous_count:  243033
********************************************************************************************************


Unnamed: 0,feature1_name,feature1_value,feature2_name,feature2_value,edible_count,poisonous_count
7,cap-shape,b,cap-surface,g,0,1
15,cap-shape,f,cap-surface,g,0,1
19,cap-shape,k,cap-surface,g,0,1
21,cap-shape,c,cap-surface,y,0,3
23,cap-shape,c,cap-surface,g,0,1
...,...,...,...,...,...,...
6386,population,s,habitat,u,0,136
6389,population,s,habitat,d,0,96
6408,population,v,habitat,g,0,388
6409,population,v,habitat,m,0,36
