In [4]:
%run CAR_creation.ipynb

In [1]:
import re
import numpy as np


def make_intervalfunc(minv, maxv, left_inclusivity, right_inclusivity):
    def inner_func(value):
        if greaterthan(value, minv, left_inclusivity) and lesserthan(value, maxv, right_inclusivity):
            return True
        else:
            return False
        
    return inner_func
        
def greaterthan(a, b, inclusivity):
    if inclusivity:
        if a >= b: return True
    elif a > b: return True
    
    return False
        
def lesserthan(a, b, inclusivity):
    if inclusivity:
        if a <= b: return True
    elif a < b: return True
    
    return False


class Interval:

    def __init__(self, minval, maxval, left_inclusive, right_inclusive):
        self.minval = minval
        self.maxval = maxval
        self.left_inclusive = left_inclusive
        self.right_inclusive = right_inclusive
        
        
        self.left_bracket = "<" if left_inclusive else "("
        self.right_bracket = ">" if right_inclusive else ")"
        
        self.__membership_func = np.vectorize(
            make_intervalfunc(self.minval, self.maxval, self.left_inclusive, self.right_inclusive)
        )
            
    
    def __hash__(self):
        return hash(repr(self))
            
    def refit(self, vals):
        """refit values to grid
        """
        values = np.array(vals)
        
        mask = self.test_membership(values)
        new_array = values[mask]

        left, right = min(new_array), max(new_array)

        return Interval(left, right, True, True)
        
            
    def test_membership(self, value):
        return self.__membership_func(value)
        

    def string(self):
        return "{}{};{}{}".format(self.left_bracket, self.minval, self.maxval, self.right_bracket)
        
    def __repr__(self):
        return "Interval[{}{};{}{}]".format(self.left_bracket, self.minval, self.maxval, self.right_bracket)

In [3]:
import re

class IntervalReader():
    
    
    interval_regex = re.compile("(<|\()(\d+(?:\.(?:\d)+)?);(\d+(?:\.(?:\d)+)?)(\)|>)")
    
    
    def __init__(self):
        # opened interval brackets
        self.__open_bracket = "(", ")"
        
        # closed interval brackets
        self.__closed_bracket = "<", ">"
        
        # negative and positive infinity symbol,
        # e.g. -inf, +inf
        self.__infinity_symbol = "-inf", "+inf"
        
        # decimal separator, e.g. ".", ","
        self.__decimal_separator = "."
        
        # interval members separator
        self.__members_separator = ";"
        
        
    def compile_reader(self):

        left_bracket_open = re.escape(self.open_bracket[0])
        left_bracket_closed = re.escape(self.closed_bracket[0])
        
        right_bracket_open = re.escape(self.open_bracket[1])
        right_braket_closed = re.escape(self.closed_bracket[1])
        
        # e.g. (   <    |   \(    ) 
        #      (   {}   |   {}    )
        left_bracket_regex_string = "({}|{})".format(
            left_bracket_open,
            left_bracket_closed
        )
        
        # e.g. (   >   |   \)    ) 
        #      (   {}   |   {}    )
        right_bracket_regex_string = "({}|{})".format(
            right_bracket_open,
            right_braket_closed
        )
        
        # ((   \d+  (?:  \.   (?:\d)+  )?   )|-inf)
        # (   \d+  (?:  {}   (?:\d)+  )?   )
        left_number_regex_string = "(\d+(?:{}(?:\d)+)?|{})".format(
            re.escape(self.decimal_separator),
            re.escape(self.infinity_symbol[0]),
        )
        
        
        # ((   \d+  (?:  \.   (?:\d)+  )?   )|+inf)
        # (   \d+  (?:  {}   (?:\d)+  )?   )
        right_number_regex_string = "(\d+(?:{}(?:\d)+)?|{})".format(
            re.escape(self.decimal_separator),
            re.escape(self.infinity_symbol[1]),
        )
        
        members_separator_regex = "{}".format(
            re.escape(self.members_separator)
        )
        
        
        interval_regex_string = "{}{}{}{}{}".format(
            left_bracket_regex_string,
            left_number_regex_string,
            members_separator_regex,
            right_number_regex_string,
            right_bracket_regex_string
        )
        
        self.__interval_regex = re.compile(interval_regex_string)
        
        
    def read(self, interval_string):
        # returns array of results, take first member
        args = self.__interval_regex.findall(interval_string)[0]
        
        left_bracket, minval, maxval, right_bracket = args
        
        left_inclusive = True if left_bracket == self.closed_bracket[0] else False
        right_inclusive = True if right_bracket == self.closed_bracket[1] else False
        
        interval = Interval(minval, maxval, left_inclusive, right_inclusive)
        
        return interval
      
        
    # boilerplate getter/setter code    
    
    @property
    def open_bracket(self):
        return self.__open_bracket
    
    @open_bracket.setter
    def open_bracket(self, val):
        self.__open_bracket = val
        return self
    
    @property
    def closed_bracket(self):
        return self.__closed_bracket
    
    @closed_bracket.setter
    def closed_bracket(self, val):
        self.__closed_bracket = val
        return self
        
    @property
    def infinity_symbol(self):
        return self.__infinity_symbol
    
    @infinity_symbol.setter
    def infinity_symbol(self, val):
        self.__infinity_symbol = val
        return self
    
    @property
    def decimal_separator(self):
        return self.__decimal_separator
    
    @decimal_separator.setter
    def decimal_separator(self, val):
        self.__decimal_separator = val
        return self
    
    @property
    def members_separator(self):
        return self.__members_separator
    
    @members_separator.setter
    def members_separator(self, val):
        self.__members_separator = val
        return self
    
    
        
interval_reader = IntervalReader()

interval_reader.compile_reader()

interval_reader.read("<1.2;2.3>")

Interval[<1.2;2.3>]

In [44]:
%run ../../main.py

from pyarc.qcba.interval import Interval
import copy

class QuantitativeCAR:
    
    def __init__(self, rule):
        self.antecedent = self.__create_intervals_from_antecedent(rule.antecedent)
        self.consequent = copy.copy(rule.consequent)
        
        self.confidence = rule.confidence
        self.support = rule.support
        self.rulelen = rule.rulelen
        self.rid = rule.rid
        
        
    def __create_intervals_from_antecedent(self, antecedent):
        interval_antecedent = []
        
        for literal in antecedent:
            attribute, value = literal
            
            interval = Interval(value)
            
            interval_antecedent.append((attribute, interval))
        
        
        return self.__sort_antecedent(interval_antecedent)
    
    
    def __sort_antecedent(self, antecedent):
        return sorted(antecedent)
        
        
        
    def copy(self):
        return copy.deepcopy(self)
        
        
    def __repr__(self):
        ant = self.antecedent
        ant_string_arr = [ key + "=" + val.string() for key, val in ant ]
        ant_string = "{" + ",".join(ant_string_arr) + "}"
        
        args = [
            ant_string,
            "{" + self.consequent.string() + "}",
            self.support,
            self.confidence,
            self.rulelen,
            self.rid
        ]
        
        text = "CAR {} => {} sup: {:.2f} conf: {:.2f} len: {}, id: {}".format(*args)

        return text
    
    
    


In [29]:
import pandas

class RuleExtender:
    
    def __init__(self, dataset):
    
        if type(dataset) != pandas.DataFrame:
            raise Exception(
                "type of dataset must be pandas.DataFrame"
            )
            
        self.__dataset = dataset
        
        
        
    def transform(self, rule):
        pass
    
    
    
    def __get_extensions(self, literal, vals):
        attribute, interval = literal
        
        # if there were not unique values
        # e.g. [2, 2, 3, 4, 5, 5]
        # [2, 5] would be still chosen
        unique_vals = set(vals)
        
    
    def __get_beam_extensions(self, rule):
        pass
    
    
    def __upper_extension(self, literal, vals):
        pass
    
    
    def __crisp_accept(self, rule):
        pass
    
    def __conditional_accept(self, rule):
        pass

        
        

In [50]:
r0 = rules[1]
q_car0 = QuantitativeCAR(r0)

q_car0

q_car0_copy = copy.deepcopy(q_car0)

q_car0_copy.consequent.value = "bust"

In [74]:
import numpy as np

def get_extensions(literal, vals):
    """
    ensure sort and unique
    before calling functions
    """
    
    if type(vals) != np.ndarray:
        raise Exception("type of vals must be numpy.ndarray")
    
    vals_len = vals.size
    
    attribute, interval = literal
    
    # from [[3], [2], [5], [4]]
    # to [2, 3, 4, 5]
    sorted_vals = np.sort(vals).reshape(vals_len)

    # if there were not unique values
    # e.g. [2, 2, 3, 4, 5, 5]
    # [2, 5] would be still chosen
    unique_vals = np.unique(sorted_vals)
    
    mask = interval.test_membership(unique_vals)
    
    indexes = np.where(mask)
    
    print(unique_vals)
    
    return indexes


budget = movies_train_undiscr[['estimated-budget']]
lit0 = q_car0.antecedent[0]


get_extensions(lit0, np.array(budget))

[ 10  12  21  23  32  35  43  45  54  55  58  62  65  66  74  78  80  85
 106 110 132 143 152 154 160 180 186 202 209 223 260 264]


(array([30, 31], dtype=int64),)