# Kiwi-Aussie Classification with IREP

In [1]:
import pandas as pd
import numpy as np

### Define our features and their distributions

In [2]:
features = {'Nationality':('Kiwi','Aussie'),
 'Funny_Accent':
    ('Yes','No'),
 'Waltzing':
    ('Yes','No'),
 'AU_Anthem':
    ('Yes','No','Some'),
 'Six':
    ('Properly','Seeks'), 
 'Fish_and_Chips':
    ('Normal','Feessh_and_Cheeps','Fush_and_Chups'),
 'Favourite':
    ('Vegemite','Marmite','Butter','Marmalade'),
 'Owns_X_Sheep':
    ('One','Two','Three','Many'),
 'Can_Fly':
    ('Yes','No','Briefly')
}
                
kiwi_dist = {'Nationality':(1,0),
  'Funny_Accent':
      (.8,.2),
  'Waltzing':
      (.6,.4),
  'AU_Anthem':
      (.05,.90,.05),
  'Six':
      (.8,.20),
  'Fish_and_Chips':
      (.4,.15,.45),
  'Favourite':
      (.35,.40,.10,.15),
  'Owns_X_Sheep':
      (.1,.3,.4,.2),
  'Can_Fly':
      (0,.9,.1)
 }
    
aussie_dist = {'Nationality':(0,1),
  'Funny_Accent':
      (.9,.1),
  'Waltzing':
      (.7,.3),
  'AU_Anthem':
      (.05,.85,.1),
  'Six':
      (.4,.6),
  'Fish_and_Chips':
      (.3,.05,.65),
  'Favourite':
      (.5,.25,.15,.1),
  'Owns_X_Sheep':
      (.2,.5,.1,.2),
  'Can_Fly':
      (0,1,0)
 }
             

### Generate Kiwis and Aussies

Generate arrays of random kiwis and aussies according to their feature distributions:

In [3]:
n_kiwis = 100
n_aussies = 100

kiwis = [ np.random.choice(options, n_kiwis, p=kiwi_dist[feature])
            for feature, options in features.items() ]

aussies = [ np.random.choice(options, n_aussies, p=aussie_dist[feature])
            for feature, options in features.items() ]
              

In [4]:
kiwis = np.array(kiwis).T
aussies = np.array(aussies).T
everyone = np.concatenate((kiwis, aussies), axis=0)

Load everyone into pandas.

In [5]:
df = pd.DataFrame(columns=['Nationality','Funny_Accent','Waltzing','AU_Anthem',
                          'Six','Fish_and_Chips','Favorite',
                          'Own_X_Sheep','Can_Fly'], data=everyone)

This shuffles the dataframe:

In [6]:
df = df.sample(frac=1, random_state=42)

In [7]:
df.head(10)

Unnamed: 0,Nationality,Funny_Accent,Waltzing,AU_Anthem,Six,Fish_and_Chips,Favorite,Own_X_Sheep,Can_Fly
95,Kiwi,Yes,No,Yes,Seeks,Feessh_and_Cheeps,Marmite,Three,No
15,Kiwi,Yes,No,Some,Properly,Normal,Marmalade,Three,No
30,Kiwi,Yes,Yes,No,Properly,Fush_and_Chups,Vegemite,Two,No
158,Aussie,Yes,Yes,No,Properly,Normal,Marmalade,Many,No
128,Aussie,Yes,No,No,Properly,Fush_and_Chups,Vegemite,One,No
115,Aussie,Yes,Yes,No,Properly,Normal,Butter,One,No
69,Kiwi,Yes,Yes,No,Properly,Fush_and_Chups,Vegemite,Three,No
170,Aussie,Yes,Yes,No,Seeks,Fush_and_Chups,Marmite,Two,No
174,Aussie,Yes,No,No,Properly,Fush_and_Chups,Vegemite,Many,No
45,Kiwi,Yes,Yes,No,Properly,Feessh_and_Cheeps,Vegemite,Three,No


### Bring in IREP

In [8]:
import sys
sys.path.insert(0, '/Users/ilanmoscovitz/Documents/Python')
import IREP

Split our data:

In [9]:
train, test = IREP.train_test_split(df, test_percent=.33, seed=42)

Grow a ruleset without pruning:

In [10]:
ire_ruleset = IREP.ire(df=train, display=True, show_gain=False, sleep=False)
ire_ruleset

Remaining pos: 61
Update: []
Update: [('Own_X_Sheep', 'Three')]
Update: [('Own_X_Sheep', 'Three'), ('Six', 'Properly')]
Grown: [('Own_X_Sheep', 'Three'), ('Six', 'Properly'), ('Fish_and_Chips', 'Normal')] 

Remaining pos: 48
Update: []
Update: [('Six', 'Properly')]
Update: [('Six', 'Properly'), ('Own_X_Sheep', 'Three')]
Grown: [('Six', 'Properly'), ('Own_X_Sheep', 'Three'), ('Favorite', 'Vegemite')] 

Remaining pos: 42
Update: []
Update: [('Six', 'Properly')]
Update: [('Six', 'Properly'), ('Own_X_Sheep', 'Two')]
Grown: [('Six', 'Properly'), ('Own_X_Sheep', 'Two'), ('Favorite', 'Marmite')] 

Remaining pos: 36
Update: []
Update: [('Favorite', 'Marmalade')]
Grown: [('Favorite', 'Marmalade'), ('Waltzing', 'No')] 

Remaining pos: 29
Update: []
Update: [('Six', 'Properly')]
Grown: [('Six', 'Properly'), ('Can_Fly', 'Briefly')] 

Remaining pos: 25
Update: []
Update: [('Own_X_Sheep', 'Three')]
Grown: [('Own_X_Sheep', 'Three'), ('Waltzing', 'No')] 

Remaining pos: 21
Update: []
Update: [('Six', 

[[('Own_X_Sheep', 'Many'),
  ('Six', 'Properly'),
  ('Favorite', 'Vegemite'),
  ('Waltzing', 'Yes')],
 [('Six', 'Properly'),
  ('Favorite', 'Vegemite'),
  ('Waltzing', 'Yes'),
  ('Fish_and_Chips', 'Fush_and_Chups'),
  ('Own_X_Sheep', 'Two'),
  ('Funny_Accent', 'Yes')],
 [('Favorite', 'Vegemite'),
  ('Own_X_Sheep', 'Many'),
  ('Waltzing', 'No'),
  ('Six', 'Seeks')],
 [('Own_X_Sheep', 'Three'), ('Funny_Accent', 'No')],
 [('Funny_Accent', 'No'), ('Favorite', 'Marmite'), ('Waltzing', 'No')],
 [('Fish_and_Chips', 'Fush_and_Chups'),
  ('Six', 'Properly'),
  ('Own_X_Sheep', 'One')],
 [('Six', 'Properly'),
  ('Favorite', 'Vegemite'),
  ('Waltzing', 'Yes'),
  ('Own_X_Sheep', 'Many'),
  ('Fish_and_Chips', 'Normal')],
 [('Own_X_Sheep', 'Many'),
  ('Fish_and_Chips', 'Normal'),
  ('Funny_Accent', 'Yes'),
  ('Six', 'Seeks'),
  ('Favorite', 'Marmalade')],
 [('Six', 'Properly'), ('Own_X_Sheep', 'Three'), ('Favorite', 'Butter')],
 [('Own_X_Sheep', 'Many'), ('Fish_and_Chips', 'Normal'), ('Waltzing', 'No

Now with pruning:

In [12]:
irep_ruleset = IREP.irep(df=train, display=True, show_gain=False, sleep=False, seed=42)
irep_ruleset

Update: []
Update: [('Own_X_Sheep', 'Three')]
Grown: [('Own_X_Sheep', 'Three'), ('Six', 'Properly')]
Pruned to: [('Own_X_Sheep', 'Three')]
Updated ruleset: [[('Own_X_Sheep', 'Three')]] 

Update: []
Update: [('Six', 'Properly')]
Update: [('Six', 'Properly'), ('Own_X_Sheep', 'Two')]
Grown: [('Six', 'Properly'), ('Own_X_Sheep', 'Two'), ('Favorite', 'Marmite')]
Pruned to: [('Six', 'Properly'), ('Own_X_Sheep', 'Two')]
Updated ruleset: [[('Own_X_Sheep', 'Three')], [('Six', 'Properly'), ('Own_X_Sheep', 'Two')]] 

Update: []
Update: [('Waltzing', 'No')]
Grown: [('Waltzing', 'No'), ('Funny_Accent', 'No')]
Pruned to: [('Waltzing', 'No'), ('Funny_Accent', 'No')]
Updated ruleset: [[('Own_X_Sheep', 'Three')], [('Six', 'Properly'), ('Own_X_Sheep', 'Two')], [('Waltzing', 'No'), ('Funny_Accent', 'No')]] 

Update: []
Update: [('Favorite', 'Marmalade')]
Grown: [('Favorite', 'Marmalade'), ('Waltzing', 'No')]
Pruned to: [('Favorite', 'Marmalade')]
Updated ruleset: [[('Own_X_Sheep', 'Three')], [('Six', 'Pr

[[('Own_X_Sheep', 'Three')],
 [('Six', 'Properly'), ('Own_X_Sheep', 'Two')],
 [('Waltzing', 'No'), ('Funny_Accent', 'No')],
 [('Favorite', 'Marmalade')]]

Now a bunch of times:

In [14]:
experiment_results = IREP.experiment(df, n=10, first_seed=42)

In [15]:
experiment_results

Unnamed: 0_level_0,Precision,Recall,Freq_Selected
Ruleset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"[[(Own_X_Sheep, Three)]]",0.860133,0.369925,0.4
"[[(Six, Properly), (Own_X_Sheep, Three)]]",1.0,0.313492,0.2
"[[(Six, Properly), (Own_X_Sheep, Two), (Favorite, Marmite)], [(Own_X_Sheep, Three)], [(Own_X_Sheep, Many), (Favorite, Marmalade)], [(Funny_Accent, No), (Fish_and_Chips, Normal)]]",0.8125,0.83871,0.1
"[[(Six, Properly), (Own_X_Sheep, Two), (Favorite, Marmite)], [(Own_X_Sheep, Three)]]",0.823529,0.518519,0.1
"[[(Six, Properly), (Own_X_Sheep, Two), (Favorite, Marmite)], [(Six, Properly), (Can_Fly, Briefly)], [(Own_X_Sheep, Three)], [(Favorite, Marmalade)]]",0.833333,0.769231,0.1
"[[(Waltzing, No), (Six, Properly), (Funny_Accent, No)], [(Six, Properly), (Own_X_Sheep, Two), (Favorite, Marmite)], [(Own_X_Sheep, Three)], [(Favorite, Marmalade)]]",0.76,0.655172,0.1


In [None]:
IREP.exp_performance(experiment_results)

In [None]:
IREP.ruleset_covers(irep_ruleset,test)