In [109]:
%run CAR_creation.ipynb

In [110]:
import re
import numpy as np


def make_intervalfunc(minv, maxv, left_inclusivity, right_inclusivity):
    def inner_func(value):
        if greaterthan(value, minv, left_inclusivity) and lesserthan(value, maxv, right_inclusivity):
            return True
        else:
            return False
        
    return inner_func
        
def greaterthan(a, b, inclusivity):
    if inclusivity:
        if a >= b: return True
    elif a > b: return True
    
    return False
        
def lesserthan(a, b, inclusivity):
    if inclusivity:
        if a <= b: return True
    elif a < b: return True
    
    return False


class Interval:

    def __init__(self, minval, maxval, left_inclusive, right_inclusive):
        self.minval = minval
        self.maxval = maxval
        self.left_inclusive = left_inclusive
        self.right_inclusive = right_inclusive
        
        
        self.left_bracket = "<" if left_inclusive else "("
        self.right_bracket = ">" if right_inclusive else ")"
        
        self.__membership_func = np.vectorize(
            make_intervalfunc(self.minval, self.maxval, self.left_inclusive, self.right_inclusive)
        )
            
    
    def __hash__(self):
        return hash(repr(self))
    
    def __eq__(self, other):
        return hash(self) == hash(other)
            
    def refit(self, vals):
        """refit values to grid
        """
        values = np.array(vals)
        
        mask = self.test_membership(values)
        new_array = values[mask]

        left, right = min(new_array), max(new_array)

        return Interval(left, right, True, True)
        
            
    def test_membership(self, value):
        return self.__membership_func(value)
        

    def string(self):
        return "{}{};{}{}".format(self.left_bracket, self.minval, self.maxval, self.right_bracket)
        
    def __repr__(self):
        return "Interval[{}{};{}{}]".format(self.left_bracket, self.minval, self.maxval, self.right_bracket)

In [111]:
import re

class IntervalReader():
    
    
    interval_regex = re.compile("(<|\()(\d+(?:\.(?:\d)+)?);(\d+(?:\.(?:\d)+)?)(\)|>)")
    
    
    def __init__(self):
        # opened interval brackets
        self.__open_bracket = "(", ")"
        
        # closed interval brackets
        self.__closed_bracket = "<", ">"
        
        # negative and positive infinity symbol,
        # e.g. -inf, +inf
        self.__infinity_symbol = "-inf", "+inf"
        
        # decimal separator, e.g. ".", ","
        self.__decimal_separator = "."
        
        # interval members separator
        self.__members_separator = ";"
        
        self.compile_reader()
        
        
    def compile_reader(self):

        left_bracket_open = re.escape(self.open_bracket[0])
        left_bracket_closed = re.escape(self.closed_bracket[0])
        
        right_bracket_open = re.escape(self.open_bracket[1])
        right_braket_closed = re.escape(self.closed_bracket[1])
        
        # e.g. (   <    |   \(    ) 
        #      (   {}   |   {}    )
        left_bracket_regex_string = "({}|{})".format(
            left_bracket_open,
            left_bracket_closed
        )
        
        # e.g. (   >   |   \)    ) 
        #      (   {}   |   {}    )
        right_bracket_regex_string = "({}|{})".format(
            right_bracket_open,
            right_braket_closed
        )
        
        # ((   \d+  (?:  \.   (?:\d)+  )?   )|-inf)
        # (   \d+  (?:  {}   (?:\d)+  )?   )
        left_number_regex_string = "(\d+(?:{}(?:\d)+)?|{})".format(
            re.escape(self.decimal_separator),
            re.escape(self.infinity_symbol[0]),
        )
        
        
        # ((   \d+  (?:  \.   (?:\d)+  )?   )|+inf)
        # (   \d+  (?:  {}   (?:\d)+  )?   )
        right_number_regex_string = "(\d+(?:{}(?:\d)+)?|{})".format(
            re.escape(self.decimal_separator),
            re.escape(self.infinity_symbol[1]),
        )
        
        members_separator_regex = "{}".format(
            re.escape(self.members_separator)
        )
        
        
        interval_regex_string = "{}{}{}{}{}".format(
            left_bracket_regex_string,
            left_number_regex_string,
            members_separator_regex,
            right_number_regex_string,
            right_bracket_regex_string
        )
        
        self.__interval_regex = re.compile(interval_regex_string)
        
        
    def read(self, interval_string):
        # returns array of results, take first member
        args = self.__interval_regex.findall(interval_string)[0]
        
        left_bracket, minval, maxval, right_bracket = args
        
        left_inclusive = True if left_bracket == self.closed_bracket[0] else False
        right_inclusive = True if right_bracket == self.closed_bracket[1] else False
        
        interval = Interval(
            float(minval),
            float(maxval),
            left_inclusive,
            right_inclusive
        )
        
        return interval
      
        
    # boilerplate getter/setter code    
    
    @property
    def open_bracket(self):
        return self.__open_bracket
    
    @open_bracket.setter
    def open_bracket(self, val):
        self.__open_bracket = val
        return self
    
    @property
    def closed_bracket(self):
        return self.__closed_bracket
    
    @closed_bracket.setter
    def closed_bracket(self, val):
        self.__closed_bracket = val
        return self
        
    @property
    def infinity_symbol(self):
        return self.__infinity_symbol
    
    @infinity_symbol.setter
    def infinity_symbol(self, val):
        self.__infinity_symbol = val
        return self
    
    @property
    def decimal_separator(self):
        return self.__decimal_separator
    
    @decimal_separator.setter
    def decimal_separator(self, val):
        self.__decimal_separator = val
        return self
    
    @property
    def members_separator(self):
        return self.__members_separator
    
    @members_separator.setter
    def members_separator(self, val):
        self.__members_separator = val
        return self
    
    
        
interval_reader = IntervalReader()

interval_reader.compile_reader()

interval_reader.read("<1.2;2.3>")

Interval[<1.2;2.3>]

In [112]:
%run ../../main.py

import copy

class QuantitativeCAR:
    
    interval_reader = IntervalReader()
    
    def __init__(self, rule):
        self.antecedent = self.__create_intervals_from_antecedent(rule.antecedent)
        self.consequent = copy.copy(rule.consequent)
        
        self.confidence = rule.confidence
        self.support = rule.support
        self.rulelen = rule.rulelen
        self.rid = rule.rid
        
        
    def __create_intervals_from_antecedent(self, antecedent):
        interval_antecedent = []
        
        for literal in antecedent:
            attribute, value = literal
            
            interval = interval_reader.read(value)
            
            interval_antecedent.append((attribute, interval))
        
        
        return self.__sort_antecedent(interval_antecedent)
    
    
    def __sort_antecedent(self, antecedent):
        return sorted(antecedent)
        
        
        
    def copy(self):
        return copy.deepcopy(self)
        
        
    def __repr__(self):
        ant = self.antecedent
        ant_string_arr = [ key + "=" + val.string() for key, val in ant ]
        ant_string = "{" + ",".join(ant_string_arr) + "}"
        
        args = [
            ant_string,
            "{" + self.consequent.string() + "}",
            self.support,
            self.confidence,
            self.rulelen,
            self.rid
        ]
        
        text = "CAR {} => {} sup: {:.2f} conf: {:.2f} len: {}, id: {}".format(*args)

        return text
    
    
    


In [113]:
import pandas
import numpy as np

class QuantitativeDataFrame:
    
    def __init__(self, dataframe):
        if type(dataframe) != pandas.DataFrame:
            raise Exception("type of dataframe must be pandas.dataframe")
        
        
        self.__dataframe = dataframe
        
        # sorted and unique columns of the dataframe
        # saved as a numpy array
        self.__preprocessed_columns = self.__preprocess_columns(dataframe)
    
    
    def column(self, colname):
        return self.__preprocessed_columns[colname]
    
    def __preprocess_columns(self, dataframe):
        
        # covert to dict
        # column -> list
        # need to convert it to numpy array
        dataframe_dict = dataframe.to_dict(orient="list")
        
        dataframe_ndarray = {}
        
        
        for column, value_list in dataframe_dict.items():
            transformed_list = np.sort(np.unique(value_list))
            dataframe_ndarray[column] = transformed_list
            
        return dataframe_ndarray
        
        
    
    
    
qds = QuantitativeDataFrame(movies_undiscr_txns)

budget = qds.column("a-list-celebrities")

print(budget)
interval = Interval(0, 2, True, False)

interval.test_membership(budget)

[1 2 3 4 5 6 7]


array([ True, False, False, False, False, False, False], dtype=bool)

In [125]:
import pandas

class RuleExtender:
    
    def __init__(self, dataframe):
    
        if type(dataframe) != QuantitativeDataFrame:
            raise Exception(
                "type of dataset must be pandas.DataFrame"
            )
            
        self.__dataframe = dataframe
        
        
        
    def transform(self, rules):
        
        copied_rules = [ rule.copy() for rule in rules ]
        
        extended_rules = [ self.__extend(rule) for rule in copied_rules ]
        
        return extended_rules
    
    
    
    def __extend(self, rule):
        ext = self.__get_extensions(rule)
        print()
        print()
        print(rule)
        print()
        print("extensions:")
        [ print(e) for e in ext ]
        
        print()
    
    
    def __extend_rule(self, rule):
        current_best = rule
        
        while True:
            extension_succesful = False
            
            if extension_succesful == False:
                break
        
        
    def __get_extensions(self, rule):
        extended_rules = set()
        
        for literal in rule.antecedent:
            attribute, interval = literal
            
            neighborhood = self.__get_direct_extensions(literal)
            
            for extended_literal in neighborhood:
                # copy the rule so the extended literal
                # can replace the default literal
                copied_rule = rule.copy()
                
                # find the index of the literal
                # so that it can be replaced
                current_literal_index = copied_rule.antecedent.index(literal)
                
                copied_rule.antecedent[current_literal_index] = extended_literal
                
                extended_rules.add(copied_rule)
                
        return extended_rules
            
    
    def __get_direct_extensions(self, literal):
        """
        ensure sort and unique
        before calling functions
        """
        
        attribute, interval = literal
        
        vals = self.__dataframe.column(attribute)
        vals_len = vals.size

        mask = interval.test_membership(vals)

        # indices of interval members
        # we want to extend them 
        # once to the left
        # and once to the right
        # bu we have to check if resulting
        # indices are not larger than value size
        member_indexes = np.where(mask)[0]

        first_index = member_indexes[0]
        last_index = member_indexes[-1]

        first_index_modified = first_index - 1
        last_index_modified = last_index + 1
        
        no_left_extension = False
        no_right_extension = False

        if first_index_modified < 0:
            no_left_extension = True

        # if last_index_modified is larger than
        # available indices
        if last_index_modified > vals_len - 1:
            no_right_extension = True


        new_left_bound = interval.minval
        new_right_bound = interval.maxval

        if not no_left_extension:
            new_left_bound = vals[first_index_modified]

        if not no_right_extension:
            new_right_bound = vals[last_index_modified]


        # prepare return values
        extensions = []

        if not no_left_extension:
            extension = new_left_bound, interval.maxval

            temp_interval = Interval(
                new_left_bound,
                interval.maxval,
                interval.left_inclusive,
                interval.right_inclusive
            )

            extensions.append((attribute, temp_interval))

        if not no_right_extension:
            extensoin = interval.minval, new_right_bound

            temp_interval = Interval(
                interval.minval,
                new_right_bound,
                interval.left_inclusive,
                True
            )

            extensions.append((attribute, temp_interval))

        return extensions
        
    
    def __get_beam_extensions(self, rule):
        pass
    
    
    def __upper_extension(self, literal, vals):
        pass
    
    
    def __crisp_accept(self, rule):
        pass
    
    def __conditional_accept(self, rule):
        pass

        
        
        
rule_ext = RuleExtender(qds)      

qrules = [ QuantitativeCAR(r) for r in rules ]

rule_ext.transform(qrules)



CAR {a-list-celebrities=<0.0;2.0)} => {class=box-office-bomb} sup: 0.31 conf: 1.00 len: 2, id: 136

extensions:
CAR {a-list-celebrities=<0.0;2)} => {class=box-office-bomb} sup: 0.31 conf: 1.00 len: 2, id: 136



CAR {estimated-budget=<250.0;300.0)} => {class=main-stream-hit} sup: 0.06 conf: 1.00 len: 2, id: 98

extensions:
CAR {estimated-budget=<223;300.0)} => {class=main-stream-hit} sup: 0.06 conf: 1.00 len: 2, id: 98



CAR {a-list-celebrities=<4.0;6.0),estimated-budget=<0.0;50.0)} => {class=critical-success} sup: 0.06 conf: 1.00 len: 3, id: 118

extensions:
CAR {a-list-celebrities=<4.0;6.0),estimated-budget=<0.0;54)} => {class=critical-success} sup: 0.06 conf: 1.00 len: 3, id: 118
CAR {a-list-celebrities=<3;6.0),estimated-budget=<0.0;50.0)} => {class=critical-success} sup: 0.06 conf: 1.00 len: 3, id: 118
CAR {a-list-celebrities=<4.0;6),estimated-budget=<0.0;50.0)} => {class=critical-success} sup: 0.06 conf: 1.00 len: 3, id: 118



CAR {a-list-celebrities=<6.0;8.0)} => {class=criti

[None, None, None, None, None, None, None, None, None]

In [19]:
r0 = rules[1]
q_car0 = QuantitativeCAR(r0)

q_car0

q_car0_copy = copy.deepcopy(q_car0)

q_car0_copy.consequent.value = "bust"

In [23]:
import numpy as np

def get_extensions(literal, vals):
    """
    ensure sort and unique
    before calling functions
    """
    
    if type(vals) != np.ndarray:
        raise Exception("type of vals must be numpy.ndarray")
    
    vals_len = vals.size
    
    attribute, interval = literal
    
    # from [[3], [2], [5], [4]]
    # to [2, 3, 4, 5]
    sorted_vals = np.sort(vals).reshape(vals_len)

    # if there were not unique values
    # e.g. [2, 2, 3, 4, 5, 5]
    # [2, 5] would be still chosen
    unique_vals = np.unique(sorted_vals)
    
    mask = interval.test_membership(unique_vals)
    
    # indices of interval members
    # we want to extend them 
    # once to the left
    # and once to the right
    # bu we have to check if resulting
    # indices are not larger than value size
    member_indexes = np.where(mask)[0]
    
    first_index = member_indexes[0]
    last_index = member_indexes[-1]
    
    first_index_modified = first_index - 1
    last_index_modified = last_index + 1
    
    no_left_extension = False
    no_right_extension = False
    
    if first_index_modified < 0:
        no_left_extension = True
        
    # if last_index_modified is larger than
    # available indices
    if last_index_modified > unique_vals.size - 1:
        no_right_extension = True
    
    
    new_left_bound = interval.minval
    new_right_bound = interval.maxval
    
    if not no_left_extension:
        new_left_bound = unique_vals[first_index_modified]
        
    if not no_right_extension:
        new_right_bound = unique_vals[last_index_modified]
        
        
    # prepare return values
    extensions = []
    
    if not no_left_extension:
        extension = new_left_bound, interval.maxval
        
        temp_interval = Interval(
            new_left_bound,
            interval.maxval,
            interval.left_inclusive,
            interval.right_inclusive
        )
        
        extensions.append((attribute, temp_interval))
    
    if not no_right_extension:
        extensoin = interval.minval, new_right_bound
        
        temp_interval = Interval(
            interval.minval,
            new_right_bound,
            interval.left_inclusive,
            interval.right_inclusive
        )
        
        extensions.append((attribute, temp_interval))
    
    return extensions


budget = movies_undiscr_txns[['estimated-budget']]
lit0 = q_car0.antecedent[0]


get_extensions(lit0, np.array(budget))

[('estimated-budget', Interval[<223;300.0)])]

In [107]:
arr = [("A", Interval(1, 2, True, True))]

#arr.index(("A", Interval(1, 2, True, True)))

hash(Interval(1, 2, True, True)) == hash(Interval(1, 2, True, True))

True