In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os.path
import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.datasets.mldata import fetch_mldata
from sklearn.cross_validation import StratifiedKFold

In [3]:
from tabpar import TabDataParser
from reppar import RulesParser
from procrules import ProcRules

from rulstat import RulesStats
from logical import SimpleVoting

In [4]:
data_home = os.path.join("../", "data")
wine_bunch = fetch_mldata("wine", data_home=data_home)
data, labels = wine_bunch['data'], wine_bunch['target']
ulabels = np.unique(labels)

In [5]:
skf = StratifiedKFold(y=labels, n_folds=2, shuffle=False, random_state=42)
for train_idx, test_idx in skf: pass # get the last of the two splits

In [6]:
ftrain = os.path.join("../", "data", "wine-train.tab")
ftest = os.path.join("../", "data", "wine-test.tab")
fall = os.path.join("../", "data", "wine-all.tab")

frules = os.path.join("../", "data", "wine-lrules.html")

In [7]:
TabDataParser.np2tab(ftrain, data[train_idx, :], labels[train_idx])
TabDataParser.np2tab(ftest, data[test_idx, :], labels[test_idx])
TabDataParser.np2tab(fall, data, labels)

In [8]:
rp = RulesParser(frules)
tp = TabDataParser(ftrain)
rules = ProcRules(tp, rp).rules
print([len(rules[key]) for key in rules.keys()])

[69, 65, 62]


In [9]:
vote_mdl = SimpleVoting(rules)

In [10]:
y = vote_mdl.fit(data[test_idx, :])

In [11]:
print(sum([1 for i, j in zip(y, labels[test_idx]) if i == j]) / len(labels[test_idx]))

0.8409090909090909


In [14]:
%pdb

Automatic pdb calling has been turned OFF


In [16]:
data_train = TabDataParser(ftrain)
stats = RulesStats(rules)
stats.compute_stats(data_train.data)
stats.stats

{1: [{1: [25, 5], 2: [0, 36], 3: [0, 24], 'I': 1.2270811730854013e-17},
  {1: [30, 0], 2: [0, 36], 3: [0, 24], 'I': 1.48559057106513e-24},
  {1: [30, 0], 2: [0, 36], 3: [0, 24], 'I': 1.48559057106513e-24},
  {1: [30, 0], 2: [0, 36], 3: [0, 24], 'I': 1.48559057106513e-24},
  {1: [24, 6], 2: [0, 36], 3: [0, 24], 'I': 1.3497892903939414e-16},
  {1: [25, 5], 2: [0, 36], 3: [0, 24], 'I': 1.2270811730854013e-17},
  {1: [26, 4], 2: [0, 36], 3: [0, 24], 'I': 9.4390859468107796e-19},
  {1: [25, 5], 2: [0, 36], 3: [0, 24], 'I': 1.2270811730854013e-17},
  {1: [24, 6], 2: [0, 36], 3: [0, 24], 'I': 1.3497892903939414e-16},
  {1: [30, 0], 2: [0, 36], 3: [0, 24], 'I': 1.48559057106513e-24},
  {1: [25, 5], 2: [0, 36], 3: [0, 24], 'I': 1.2270811730854013e-17},
  {1: [26, 4], 2: [0, 36], 3: [0, 24], 'I': 9.4390859468107796e-19},
  {1: [30, 0], 2: [0, 36], 3: [0, 24], 'I': 1.48559057106513e-24},
  {1: [26, 4], 2: [0, 36], 3: [0, 24], 'I': 9.4390859468107796e-19},
  {1: [30, 0], 2: [0, 36], 3: [0, 24], 'I