In [12]:
# load data ########################################################################################

import numpy as np
import scipy as sp
import pandas as pd
from IPython.display import display, HTML

df=pd.read_csv('W11_zoo.csv')
cols=df.columns
# print out and display dataframe as tables in HTML
display(HTML(df.head(10).to_html()))

# check if there are any missing values
print('ColumnName, DataType, MissingValues')
for i in cols:
    print(i, ',', df[i].dtype,',',df[i].isnull().any())

# print out and display dataframe as tables in HTML
display(HTML(df.head(10).to_html()))

df_raw=df.copy(deep=True) 

# Pre-processing ###############################################
print('Column data types:\n',df_raw.dtypes)
df_nb=df_raw.copy(deep=True)

print('Column data types:\n',df_nb.dtypes)


# convert numerical to categorical data, e.g., Age #################################

df_nb['hair'] = pd.cut(df_nb['hair'],2)
df_nb['feathers'] = pd.cut(df_nb['feathers'],2)
df_nb['eggs'] = pd.cut(df_nb['eggs'],2)
df_nb['milk'] = pd.cut(df_nb['milk'],2)
df_nb['airborne'] = pd.cut(df_nb['airborne'],2)
df_nb['aquatic'] = pd.cut(df_nb['aquatic'],2)
df_nb['predator'] = pd.cut(df_nb['predator'],2)
df_nb['toothed'] = pd.cut(df_nb['toothed'],2)
df_nb['backbone'] = pd.cut(df_nb['backbone'],2)
df_nb['breathes'] = pd.cut(df_nb['breathes'],2)
df_nb['venomous'] = pd.cut(df_nb['venomous'],2)
df_nb['fins'] = pd.cut(df_nb['fins'],2)
df_nb['legs'] = pd.cut(df_nb['legs'],4)
df_nb['tail'] = pd.cut(df_nb['tail'],2)
df_nb['domestic'] = pd.cut(df_nb['domestic'],2)
df_nb['catsize'] = pd.cut(df_nb['catsize'],2)
df_nb['type'] = pd.cut(df_nb['type'],7)

display('Data Example',HTML(df_nb.head(5).to_html()))

# Association Rule Mining ##########################################################################

# install the mlxtend library first
# To install this package with conda run one of the following:
# conda install -c conda-forge mlxtend 
# conda install -c conda-forge/label/gcc7 mlxtend 
# conda install -c conda-forge/label/cf201901 mlxtend 

# fix install issues in windows
# copy the following dll files
# libcrypto-1_1-x64.*
# libssl-1_1-x64.*
# from D:\Anaconda3\Library\bin to D:\Anaconda3\DLLs

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder
import matplotlib.pyplot as plt  

print(df_nb.dtypes)


# convert all columns to strings


print(df_nb.dtypes)

# convert all columns to strings
df_nb = df_nb.astype(str)
df_nb['gender'] = 'hair=' + df_nb['gender'].astype(str)
df_nb['hair'] = 'hair=' + df_nb['hair'].astype(str)
df_nb['feathers'] = 'feathers=' + df_nb['feathers'].astype(str)
df_nb['eggs'] = 'eggs=' + df_nb['eggs'].astype(str)
df_nb['milk'] = 'milk=' + df_nb['milk'].astype(str)
df_nb['airborne'] = 'airborne=' + df_nb['airborne'].astype(str)
df_nb['aquatic'] = 'aquatic=' + df_nb['aquatic'].astype(str)
df_nb['predator'] = 'predator=' + df_nb['predator'].astype(str)
df_nb['backbone'] = 'backbone=' + df_nb['backbone'].astype(str)
df_nb['breathes'] = 'breathes=' + df_nb['breathes'].astype(str)
df_nb['venomous'] = 'venomous=' + df_nb['venomous'].astype(str)
df_nb['fins'] = 'fins=' + df_nb['fins'].astype(str)
df_nb['legs'] = 'legs=' + df_nb['legs'].astype(str)
df_nb['tail'] = 'tail=' + df_nb['tail'].astype(str)
df_nb['domestic'] = 'domestic=' + df_nb['domestic'].astype(str)
df_nb['catsize'] = 'catsize=' + df_nb['catsize'].astype(str)
df_nb['type'] = 'type=' + df_nb['type'].astype(str)

print(df_nb.dtypes)


# convert data frame to lists
df_arr = df_nb.stack().groupby(level=0).apply(list).tolist()

# Encode lists to transactions
te = TransactionEncoder()
df_transactions = te.fit_transform(df_arr)


# covnert the values to booleans: TRUE and FALSE
df_rules = pd.DataFrame(df_transactions,columns=te.columns_)
display('Data Example',HTML(df_rules.head(5).to_html()))


frequent_itemsets = apriori(df_rules, min_support=0.45, use_colnames=True)
print("My freq Itemsets",frequent_itemsets)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

#display('Rules',HTML(rules.to_html()))
display('Rules',HTML(rules.head(5).to_html()))




Unnamed: 0,name,gender,origin,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,Male,Europe,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,Male,Asia,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,Male,South America,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,Male,North America,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,Female,North America,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
5,buffalo,Female,North America,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
6,calf,Female,Africa,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,1
7,carp,Male,Oceania,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,4
8,catfish,Female,Africa,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
9,cavy,Male,South America,1,0,0,1,0,0,0,1,1,1,0,0,4,0,1,0,1


ColumnName, DataType, MissingValues
name , object , False
gender , object , False
origin , object , False
hair , int64 , False
feathers , int64 , False
eggs , int64 , False
milk , int64 , False
airborne , int64 , False
aquatic , int64 , False
predator , int64 , False
toothed , int64 , False
backbone , int64 , False
breathes , int64 , False
venomous , int64 , False
fins , int64 , False
legs , int64 , False
tail , int64 , False
domestic , int64 , False
catsize , int64 , False
type , int64 , False


Unnamed: 0,name,gender,origin,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,Male,Europe,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,Male,Asia,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,Male,South America,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,Male,North America,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,Female,North America,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
5,buffalo,Female,North America,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
6,calf,Female,Africa,1,0,0,1,0,0,0,1,1,1,0,0,4,1,1,1,1
7,carp,Male,Oceania,0,0,1,0,0,1,0,1,1,0,0,1,0,1,1,0,4
8,catfish,Female,Africa,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
9,cavy,Male,South America,1,0,0,1,0,0,0,1,1,1,0,0,4,0,1,0,1


Column data types:
 name        object
gender      object
origin      object
hair         int64
feathers     int64
eggs         int64
milk         int64
airborne     int64
aquatic      int64
predator     int64
toothed      int64
backbone     int64
breathes     int64
venomous     int64
fins         int64
legs         int64
tail         int64
domestic     int64
catsize      int64
type         int64
dtype: object
Column data types:
 name        object
gender      object
origin      object
hair         int64
feathers     int64
eggs         int64
milk         int64
airborne     int64
aquatic      int64
predator     int64
toothed      int64
backbone     int64
breathes     int64
venomous     int64
fins         int64
legs         int64
tail         int64
domestic     int64
catsize      int64
type         int64
dtype: object


'Data Example'

Unnamed: 0,name,gender,origin,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,Male,Europe,"(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(2.0, 4.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.994, 1.857]"
1,antelope,Male,Asia,"(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(2.0, 4.0]","(0.5, 1.0]","(-0.001, 0.5]","(0.5, 1.0]","(0.994, 1.857]"
2,bass,Male,South America,"(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(-0.008, 2.0]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(3.571, 4.429]"
3,bear,Male,North America,"(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(2.0, 4.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.994, 1.857]"
4,boar,Female,North America,"(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(0.5, 1.0]","(-0.001, 0.5]","(-0.001, 0.5]","(2.0, 4.0]","(0.5, 1.0]","(-0.001, 0.5]","(0.5, 1.0]","(0.994, 1.857]"


name          object
gender        object
origin        object
hair        category
feathers    category
eggs        category
milk        category
airborne    category
aquatic     category
predator    category
toothed     category
backbone    category
breathes    category
venomous    category
fins        category
legs        category
tail        category
domestic    category
catsize     category
type        category
dtype: object
name          object
gender        object
origin        object
hair        category
feathers    category
eggs        category
milk        category
airborne    category
aquatic     category
predator    category
toothed     category
backbone    category
breathes    category
venomous    category
fins        category
legs        category
tail        category
domestic    category
catsize     category
type        category
dtype: object
name        object
gender      object
origin      object
hair        object
feathers    object
eggs        object
milk        object

'Data Example'

Unnamed: 0,"(-0.001, 0.5]","(0.5, 1.0]",Africa,Asia,Europe,North America,Oceania,South America,aardvark,"airborne=(-0.001, 0.5]","airborne=(0.5, 1.0]",antelope,"aquatic=(-0.001, 0.5]","aquatic=(0.5, 1.0]","backbone=(-0.001, 0.5]","backbone=(0.5, 1.0]",bass,bear,boar,"breathes=(-0.001, 0.5]","breathes=(0.5, 1.0]",buffalo,calf,carp,catfish,"catsize=(-0.001, 0.5]","catsize=(0.5, 1.0]",cavy,cheetah,chicken,chub,clam,crab,crayfish,crow,deer,dogfish,dolphin,"domestic=(-0.001, 0.5]","domestic=(0.5, 1.0]",dove,duck,"eggs=(-0.001, 0.5]","eggs=(0.5, 1.0]",elephant,"feathers=(-0.001, 0.5]","feathers=(0.5, 1.0]","fins=(-0.001, 0.5]","fins=(0.5, 1.0]",flamingo,flea,frog,fruitbat,giraffe,girl,gnat,goat,gorilla,gull,haddock,"hair=(-0.001, 0.5]","hair=(0.5, 1.0]",hair=Female,hair=Male,hamster,hare,hawk,herring,honeybee,housefly,kiwi,ladybird,lark,"legs=(-0.008, 2.0]","legs=(2.0, 4.0]","legs=(4.0, 6.0]","legs=(6.0, 8.0]",leopard,lion,lobster,lynx,"milk=(-0.001, 0.5]","milk=(0.5, 1.0]",mink,mole,mongoose,moth,newt,octopus,opossum,oryx,ostrich,parakeet,penguin,pheasant,pike,piranha,pitviper,platypus,polecat,pony,porpoise,"predator=(-0.001, 0.5]","predator=(0.5, 1.0]",puma,pussycat,raccoon,reindeer,rhea,scorpion,seahorse,seal,sealion,seasnake,seawasp,skimmer,skua,slowworm,slug,sole,sparrow,squirrel,starfish,stingray,swan,"tail=(-0.001, 0.5]","tail=(0.5, 1.0]",termite,toad,tortoise,tuatara,tuna,"type=(0.994, 1.857]","type=(1.857, 2.714]","type=(2.714, 3.571]","type=(3.571, 4.429]","type=(4.429, 5.286]","type=(5.286, 6.143]","type=(6.143, 7.0]",vampire,"venomous=(-0.001, 0.5]","venomous=(0.5, 1.0]",vole,vulture,wallaby,wasp,wolf,worm,wren
0,False,True,False,False,True,False,False,False,True,True,False,False,True,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
1,False,True,False,True,False,False,False,False,False,True,False,True,True,False,False,True,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
2,False,True,False,False,False,False,False,True,False,True,False,False,False,True,False,True,True,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False
3,False,True,False,False,False,True,False,False,False,True,False,False,True,False,False,True,False,True,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
4,False,True,False,False,False,True,False,False,False,True,False,False,True,False,False,True,False,False,True,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,True,False,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False


My freq Itemsets       support                                           itemsets
0    0.603960                                       ((0.5, 1.0])
1    0.762376                           (airborne=(-0.001, 0.5])
2    0.643564                            (aquatic=(-0.001, 0.5])
3    0.821782                              (backbone=(0.5, 1.0])
4    0.792079                              (breathes=(0.5, 1.0])
..        ...                                                ...
290  0.455446  (eggs=(0.5, 1.0], domestic=(-0.001, 0.5], veno...
291  0.455446  (feathers=(-0.001, 0.5], domestic=(-0.001, 0.5...
292  0.465347  (feathers=(-0.001, 0.5], tail=(0.5, 1.0], back...
293  0.465347  (tail=(0.5, 1.0], backbone=(0.5, 1.0], venomou...
294  0.465347  (domestic=(-0.001, 0.5], tail=(0.5, 1.0], back...

[295 rows x 2 columns]


'Rules'

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"((0.5, 1.0])","(airborne=(-0.001, 0.5])",0.60396,0.762376,0.584158,0.967213,1.268682,0.123713,7.247525
1,"(airborne=(-0.001, 0.5])","((0.5, 1.0])",0.762376,0.60396,0.584158,0.766234,1.268682,0.123713,1.694169
2,"((0.5, 1.0])","(backbone=(0.5, 1.0])",0.60396,0.821782,0.60396,1.0,1.216867,0.107637,inf
3,"(backbone=(0.5, 1.0])","((0.5, 1.0])",0.821782,0.60396,0.60396,0.73494,1.216867,0.107637,1.494149
4,"((0.5, 1.0])","(breathes=(0.5, 1.0])",0.60396,0.792079,0.465347,0.770492,0.972746,-0.013038,0.905941
