In [1]:
import pandas as pd
import numpy as np

from context import predicate_search
from predicate_search import Model, Linear, Transform, Feature, Predicate

In [2]:
data = pd.read_csv('sensor_data.csv')
data.dtime = pd.to_datetime(data.dtime)

In [3]:
data

Unnamed: 0,moteid,temperature,humidity,light,voltage,dtime
0,1.0,122.1530,-3.91901,11.04,2.03397,2004-03-31 03:38:15.757551
1,1.0,19.9884,37.09330,45.08,2.69964,2004-02-28 00:59:16.027850
2,1.0,19.3024,38.46290,45.08,2.68742,2004-02-28 01:03:16.333930
3,1.0,19.1652,38.80390,45.08,2.68742,2004-02-28 01:06:16.013453
4,1.0,19.1750,38.83790,45.08,2.69964,2004-02-28 01:06:46.778088
...,...,...,...,...,...,...
2219798,58.0,24.1730,21.65750,1729.60,2.78836,2004-04-02 09:06:21.488182
2219799,58.0,24.1436,21.73100,1729.60,2.78836,2004-04-02 09:07:22.173243
2219800,58.0,24.1436,21.65750,1670.72,2.78836,2004-04-02 09:07:52.313870
2219801,58.0,24.1240,21.58390,1670.72,2.78836,2004-04-02 09:08:52.189274


In [4]:
predictors = []
target = 'temperature'

In [5]:
m = Model(data, predictors, target, Linear(noise='robust', nu=5))
m.fit()
score = m.score()

logp = -1.0811e+07, ||grad|| = 3,683.2: 100%|██████████| 26/26 [00:01<00:00, 14.92it/s]     


In [6]:
transform = Transform(data.drop(target, axis=1))
disc_data = transform.disc_df

In [7]:
class PredicateSearch:

    def __init__(self, df, dtypes, scores):
        self.df = df
        self.dtypes
        self.scores = scores
        self.base_predicates = self.generate_base_predicates(scores)

    def generate_base_predicates(self, scores):
        p = [[Predicate(
            [Feature(feature, [value], self.df[self.df[feature] == value].index.tolist(),
                     self.dtypes[feature] == 'continuous')], scores)
            for value in sorted(self.disc_data[feature].unique())]
            for feature in self.disc_data.columns if feature != self.target]
        p = [a for b in p for a in b]
        return p

    def set_scores(self, predicates, c):
        for p in predicates:
            p.set_score(c)

    def merge_adjacent(self, predicates, idx, c, eps):
        offset = 0
        predicate = predicates[idx]
        for i in range(idx + 1, len(predicates)):
            if predicate.is_adjacent(predicates[i - offset]):
                merged_predicate = predicate.merge(predicates[i - offset])
                merged_predicate.set_score(c)
                if (merged_predicate.score - eps) <= predicate.score:
                    predicates[idx] = merged_predicate
                    predicate = merged_predicate
                    del predicates[i - offset]
                    offset += 1

    def merge_all_adjacent(self, predicates, c, eps):
        idx = 0
        while idx < len(predicates):
            self.merge_adjacent(predicates, idx, c, eps)
            idx += 1
        return predicates

    def merge_all_all_adjacent(self, predicates, c, eps, maxiters=100):
        for i in range(maxiters):
            n = len(predicates)
            predicates = self.merge_all_adjacent(predicates, c, eps)
            new_n = len(predicates)
            if n == new_n:
                return predicates

    def prune(self, predicates, best_score):
        return [p for p in predicates if p.best_score - 10 ** -5 < best_score]

    def intersect(self, predicates):
        new_predicates = [p1.merge(p2) for p1, p2 in itertools.combinations(predicates, 2) if not p1.is_adjacent(p2)]
        new_predicates = [p for p in new_predicates if p.raw_score < np.inf]
        return new_predicates

    def search(self, c=1., eps=.01, maxiters=10):
        best_score = np.inf
        best_p = []
        predicates = self.base_predicates.copy()

        for i in range(maxiters):
            self.set_scores(predicates, c)
            used_features = list(set([a for b in [p.feature_names for p in best_p] for a in b]))
            predicates = [p for p in predicates if (p.score < best_score - 10 ** -5) or
                          (p.score - 10 ** -5 < best_score and not any(i in p.feature_names for i in used_features))]
            if len(predicates) == 0:
                return best_p

            sorted_predicates = sorted(predicates, key=lambda x: x.score)
            merged_predicates = self.merge_all_all_adjacent(sorted_predicates, c, eps)

            new_best_p = min(merged_predicates, key=lambda x: x.score)
            new_best_score = new_best_p.score
            if new_best_score < best_score - 10 ** -5:
                best_score = new_best_score
                best_p = [new_best_p]
            elif new_best_score - 10 ** -5 < best_score:
                best_p.append(new_best_p)
            else:
                return best_p

            best_single_score = min([p.best_score for p in best_p])
            pruned_predicates = self.prune(merged_predicates, best_single_score)
            predicates = self.intersect(pruned_predicates)
        return best_p