In [1]:
training_data = [
    [16, -2, 'Freezer'],
    [36, 25, 'Livingroom'],
    [36, 25, 'Kitchen'],
    [65, 42, 'Greenhouse'],
    [23, -5, 'Freezer'],
    [32, 25, 'Livingroom'],
    [37, 30, 'Kitchen'],
    [85, 44, 'Greenhouse'],
    [15, -6, 'Freezer'],
    [24, 27, 'Livingroom'],
    [55, 30, 'Kitchen'],
    [59, 36, 'Greenhouse'],
    [14, -5, 'Freezer'],
    [36, 26, 'Livingroom'],
    [58, 23, 'Kitchen'],
    [91, 46, 'Greenhouse'],
    [8, -6, 'Freezer'],
    [37, 19, 'Livingroom'],
    [44, 23, 'Kitchen'],
    [86, 47, 'Greenhouse'],
    [19, -1, 'Freezer'],
    [54, 20, 'Livingroom'],
    [48, 32, 'Kitchen'],
    [56, 28, 'Greenhouse'],
    [20, -9, 'Freezer'],
    [24, 24, 'Livingroom'],
    [65, 30, 'Kitchen'],
    [55, 47, 'Greenhouse'], 
    [1, -1, 'Freezer'],
    [53, 22, 'Livingroom'],
    [37, 27, 'Kitchen'],
    [62, 26, 'Greenhouse'],
    [19, -1, 'Freezer'],
    [21, 28, 'Livingroom'],
    [38, 31, 'Kitchen'],
    [80, 22, 'Greenhouse'],
    [5, -4, 'Freezer'],
    [28, 19, 'Livingroom'],
    [39, 29, 'Kitchen'],
    [100, 38, 'Greenhouse'],
    [3, -8, 'Freezer'],
    [41, 23, 'Livingroom'],
    [57, 31, 'Kitchen'],
    [77, 24, 'Greenhouse'],
    [18, -1, 'Freezer'],
    [38, 25, 'Livingroom'],
    [74, 28, 'Kitchen'],
    [64, 45, 'Greenhouse'],
    [10, -6, 'Freezer'],
    [44, 18, 'Livingroom'],
    [67, 25, 'Kitchen'],
    [68, 20, 'Greenhouse'],
    [8, 0, 'Freezer'],
    [50, 26, 'Livingroom'],
    [64, 27, 'Kitchen'],
    [60, 24, 'Greenhouse'],
    [2, -3, 'Freezer'],
    [30, 26, 'Livingroom'],
    [59, 31, 'Kitchen'],
    [64, 20, 'Greenhouse'],
    [9, -4, 'Freezer'],
    [21, 22, 'Livingroom'],
    [71, 26, 'Kitchen'],
    [75, 39, 'Greenhouse'],
    [24, -2, 'Freezer'],
    [40, 24, 'Livingroom'],
    [41, 24, 'Kitchen'],
    [77, 40, 'Greenhouse'],
    [8, -3, 'Freezer'],
    [38, 28, 'Livingroom'],
    [62, 26, 'Kitchen'],
    [68, 34, 'Greenhouse'],
    [24, -6, 'Freezer'],
    [50, 27, 'Livingroom'],
    [54, 24, 'Kitchen'],
    [81, 34, 'Greenhouse'],
    [17, -3, 'Freezer'],
    [47, 22, 'Livingroom'],
    [43, 24, 'Kitchen'],
    [66, 21, 'Greenhouse'],
    [1, -1, 'Freezer'],
    [58, 21, 'Livingroom'],
    [72, 28, 'Kitchen'],
    [62, 24, 'Greenhouse'],
    [3, -6, 'Freezer'],
    [33, 18, 'Livingroom'],
    [42, 30, 'Kitchen'],
    [67, 37, 'Greenhouse'],
    [6, -2, 'Freezer'],
    [41, 20, 'Livingroom'],
    [50, 32, 'Kitchen'],
    [73, 21, 'Greenhouse'],
    [15, -10, 'Freezer'],
    [32, 20, 'Livingroom'],
    [37, 31, 'Kitchen'],
    [71, 33, 'Greenhouse'],
    [1, 0, 'Freezer'],
    [45, 20, 'Livingroom'],
    [43, 32, 'Kitchen'],
    [67, 36, 'Greenhouse']
]

In [2]:
header = ["humidity","temperature","room"]

In [3]:
def unique_vals(rows, col):
    return set([row[col] for row in rows])

In [4]:
def class_counts(rows):
    counts = {}
    for row in rows:
        label = row[-1]
        if label not in counts:
            counts[label] = 0
        counts[label] += 1
    return counts

In [5]:
def is_numeric(value):
    return isinstance(value, int) or isinstance(value, float)

In [6]:
class Question:
    
    def __init__(self, column, value):
        self.column = column
        self.value = value
        
    def match(self, example):
        val = example[self.column]
        if is_numeric(val):
            return val >= self.value
        else:
            return val == self.value
        
    def moreless(self, example):
        val = example[self.column]
        if is_numeric(val):
            return val >= self.value
        else:
            return val <= self.value
    
    def __repr__(self):
        # This is just a helper method to print
        # the question in a readable format.
        condition = "=="
        if is_numeric(self.value):
            condition = ">="
        return "Is %s %s %s?" % (
            header[self.column], condition, str(self.value))

In [7]:
def partition(rows, question):
    true_rows, false_rows = [], []
    for row in rows:
        if question.match(row):
            true_rows.append(row)
        else:
            false_rows.append(row)
    return true_rows, false_rows

def partitionMoreLess(rows, question):
    true_rows, false_rows = [],[]
    for row in rows:
        if question.moreless(row):
            true_rows.append(row)
        else:
            false_rows.append(row)
    return true_rows, false_rows

In [8]:
def gini(rows):
    counts = class_counts(rows)
    impurity = 1
    for lbl in counts:
        prob_of_lbl = counts[lbl] / float(len(rows))
        impurity -= prob_of_lbl**2
    return impurity

In [9]:
def info_gain(left, right, current_uncertainty):
    p = float(len(left))/(len(left)+len(right))
    return current_uncertainty - p * gini(left) - (1 - p) * gini(right)

In [10]:
def find_best_split(rows):
    best_gain = 0
    best_question = None
    current_uncertainty = gini(rows)
    n_features = len(rows[0]) - 1
    
    for col in range(n_features):
        values = set([row[col] for row in rows])
        for val in values:
            question = Question(col, val)
            true_rows, false_rows = partitionMoreLess(rows, question)
            
            if len(true_rows) == 0 or len(false_rows) == 0:
                continue
            
            gain = info_gain(true_rows, false_rows, current_uncertainty)
            
            if gain > best_gain:
                best_gain, best_question = gain, question
    
    return best_gain, best_question

In [11]:
class Leaf:
    def __init__(self, rows):
        self.predictions = class_counts(rows)

In [12]:
class Decision_Node:
    def __init__(self, question, true_branch, false_branch):
        self.question = question
        self.true_branch = true_branch
        self.false_branch = false_branch

In [13]:
def build_tree(rows):
    gain, question = find_best_split(rows)
    if gain == 0:
        return Leaf(rows)
    true_rows, false_rows = partition(rows, question)
    
    true_branch = build_tree(true_rows)
    false_branch = build_tree(false_rows)
    
    return Decision_Node(question, true_branch, false_branch)

In [14]:
def print_tree(node, spacing=""):
    if isinstance(node, Leaf):
        print(spacing + "Predict",node.predictions)
        return
    
    print(spacing + str(node.question))
    print(spacing + '--> True:')
    print_tree(node.true_branch, spacing + "  ")
    print(spacing + '--> False:')
    print_tree(node.false_branch, spacing + "  ")

In [15]:
def classify(row, node):
    """See the 'rules of recursion' above."""

    # Base case: we've reached a leaf
    if isinstance(node, Leaf):
        return node.predictions

    # Decide whether to follow the true-branch or the false-branch.
    # Compare the feature / value stored in the node,
    # to the example we're considering.
    if node.question.match(row):
        return classify(row, node.true_branch)
    else:
        return classify(row, node.false_branch)

In [16]:
def print_leaf(counts):
    """A nicer way to print the predictions at a leaf."""
    total = sum(counts.values()) * 1.0
    probs = {}
    for lbl in counts.keys():
        probs[lbl] = str(int(counts[lbl] / total * 100)) + "%"
    return probs

In [17]:
my_tree = build_tree(training_data)
print_tree(my_tree)

Is temperature >= 18?
--> True:
  Is humidity >= 55?
  --> True:
    Is temperature >= 33?
    --> True:
      Predict {'Greenhouse': 15}
    --> False:
      Is temperature >= 25?
      --> True:
        Is humidity >= 64?
        --> True:
          Predict {'Kitchen': 6}
        --> False:
          Is temperature >= 30?
          --> True:
            Predict {'Kitchen': 3}
          --> False:
            Is humidity >= 62?
            --> True:
              Predict {'Greenhouse': 1, 'Kitchen': 1}
            --> False:
              Predict {'Greenhouse': 1}
      --> False:
        Is humidity >= 60?
        --> True:
          Predict {'Greenhouse': 8}
        --> False:
          Is temperature >= 23?
          --> True:
            Predict {'Kitchen': 1}
          --> False:
            Predict {'Livingroom': 1}
  --> False:
    Is temperature >= 29?
    --> True:
      Predict {'Kitchen': 8}
    --> False:
      Is temperature >= 23?
      --> True:
        Is humidity >= 4

In [18]:
testing_data = [
    [6, -2, 'Freezer'],
    [58, 27, 'Livingroom'],
    [69, 48, 'Greenhouse'],
    [38, 18, 'Livingroom'],
    [56, 27, 'Kitchen'],
    [69, 46, 'Greenhouse'],
    [58, 27, 'Kitchen'],
    [17, -3, 'Freezer'],
    [42, 28, 'Livingroom'],
    [53, 29, 'Kitchen'],
    [95, 31, 'Greenhouse'],
    [14, -9, 'Freezer'],
    [12, -7, 'Freezer'],
    [37, 25, 'Livingroom'],
    [71, 32, 'Kitchen'],
    [92, 36, 'Greenhouse'],
    [8, -3, 'Freezer'],
    [24, 24, 'Livingroom'],
    [35, 23, 'Kitchen'],
    [63, 39, 'Greenhouse'],
]

In [19]:
for row in testing_data:
    print ("Actual: %s. Predicted: %s" %
           (row[-1], print_leaf(classify(row, my_tree))))

Actual: Freezer. Predicted: {'Freezer': '100%'}
Actual: Livingroom. Predicted: {'Greenhouse': '100%'}
Actual: Greenhouse. Predicted: {'Greenhouse': '100%'}
Actual: Livingroom. Predicted: {'Livingroom': '100%'}
Actual: Kitchen. Predicted: {'Greenhouse': '100%'}
Actual: Greenhouse. Predicted: {'Greenhouse': '100%'}
Actual: Kitchen. Predicted: {'Greenhouse': '100%'}
Actual: Freezer. Predicted: {'Freezer': '100%'}
Actual: Livingroom. Predicted: {'Livingroom': '100%'}
Actual: Kitchen. Predicted: {'Kitchen': '100%'}
Actual: Greenhouse. Predicted: {'Kitchen': '100%'}
Actual: Freezer. Predicted: {'Freezer': '100%'}
Actual: Freezer. Predicted: {'Freezer': '100%'}
Actual: Livingroom. Predicted: {'Kitchen': '100%'}
Actual: Kitchen. Predicted: {'Kitchen': '100%'}
Actual: Greenhouse. Predicted: {'Greenhouse': '100%'}
Actual: Freezer. Predicted: {'Freezer': '100%'}
Actual: Livingroom. Predicted: {'Livingroom': '100%'}
Actual: Kitchen. Predicted: {'Livingroom': '100%'}
Actual: Greenhouse. Predicted: 

In [20]:
rooms = [
    {"label": "Freezer", "minheat":-10.0, "maxheat": 10.0, "minhum": 5.7, "maxhum":40.9},
    {"label": "Kitchen", "minheat":15.65, "maxheat": 32.10, "minhum": 25, "maxhum":70.2},
    {"label": "Livingroom", "minheat":18.6, "maxheat": 28.0, "minhum": 30.65, "maxhum":65},
    {"label": "Greenhouse", "minheat":30.12, "maxheat": 55.6, "minhum": 38.7, "maxhum":80.98},
]

In [40]:
import random
import pandas as pd
def GenerateNewData(rooms, nPoints):
    data = []
    for room in rooms:
        n = random.randint(1,nPoints)
        for i in range(0,n):
            t = random.uniform(room["minheat"],room["maxheat"])
            h = random.uniform(room["minhum"], room["maxhum"])
            data.append([h,t,room["label"]])
    print(len(data))
    return data

newdata = GenerateNewData(rooms,111)
newvalues = pd.DataFrame(newdata)
newvalues[2].value_counts()

194


Kitchen       92
Greenhouse    65
Freezer       32
Livingroom     5
Name: 2, dtype: int64

In [41]:
newvalues = newvalues.sample(frac=1).reset_index(drop=True) # Randomizes the array!

In [42]:
def splitData(data, ntrees, ownvalue):
    subsets = []
    splitOne = data.sample(frac=0.5,random_state=200)
    splitTwo = data.drop(splitOne.index)
    if ntrees != 1:
        fb = splitData(splitOne, ntrees-1, ownvalue+"a")
        fb2 = splitData(splitTwo, ntrees-1, ownvalue+"b")
        for i in range(0,len(fb)):
            subsets.append(fb[i])
            subsets.append(fb2[i])
    else:
        return [splitOne, splitTwo]
    return subsets

In [43]:
subsets = splitData(newvalues,4,"a")
print("nSubsets:",len(subsets))
print("nValues/subset:",len(subsets[0]))
print(subsets[0])

nSubsets: 16
nValues/subset: 12
             0          1           2
118  54.500699  22.347950     Kitchen
59   74.332839  51.016560  Greenhouse
98   47.272201  54.111556  Greenhouse
39   49.168445  20.021334     Kitchen
28   54.529030  25.548711     Kitchen
132  59.277728  43.621841  Greenhouse
34   48.668624  36.612743  Greenhouse
104  59.454692  49.806813  Greenhouse
176  71.164943  36.463811  Greenhouse
77   36.441942  26.096728     Kitchen
156  47.767521  29.371124     Kitchen
162  67.653754  35.452831  Greenhouse


In [44]:
forrest = []
i = 1
for iSet in subsets:
    iList = iSet.values.tolist()
    i+=1
    tree = build_tree(iList)
    forrest.append(tree)
    print_tree(tree)

Is temperature >= 35.45283062099447?
--> True:
  Predict {'Greenhouse': 7}
--> False:
  Predict {'Kitchen': 5}
Is temperature >= 31.96365777510581?
--> True:
  Predict {'Greenhouse': 5}
--> False:
  Is temperature >= 17.583080888860756?
  --> True:
    Predict {'Kitchen': 5}
  --> False:
    Predict {'Freezer': 2}
Is humidity >= 47.34898388078318?
--> True:
  Is temperature >= 44.3054349285093?
  --> True:
    Predict {'Greenhouse': 4}
  --> False:
    Is humidity >= 56.357632676616646?
    --> True:
      Predict {'Kitchen': 3}
    --> False:
      Is humidity >= 53.64005736657371?
      --> True:
        Predict {'Greenhouse': 1}
      --> False:
        Predict {'Kitchen': 1}
--> False:
  Predict {'Freezer': 3}
Is humidity >= 36.140151874834025?
--> True:
  Is temperature >= 33.38845617907247?
  --> True:
    Predict {'Greenhouse': 3}
  --> False:
    Predict {'Kitchen': 6}
--> False:
  Predict {'Freezer': 3}
Is temperature >= 43.201228277221375?
--> True:
  Predict {'Greenhouse': 5

In [45]:
def predictionCounter(row,trees,labels):
    length = len(labels)
    predictions = [0 for i in range(length)]
    for tree in trees:
        prediction = classify(row,tree)
        for i in range(length):
            if(prediction.get(labels[i])is not None):
                predictions[i]+=1
    return predictions

In [46]:
def rndForest(trees,data):
    for row in data:
        predictions = [0,0,0,0]
        labels = ["Livingroom", "Kitchen", "Greenhouse", "Freezer"]
        predictions = predictionCounter(row, trees, labels)
        high = max(predictions)
        print("Actual:",row[2],labels[predictions.index(high)],":", max(predictions),"/",len(trees))

In [51]:
testing_data = GenerateNewData(rooms, 1)
#testing_data = testing_data.sample(frac=1).reset_index(drop=True) # Randomizes the array!

4


In [52]:
rndForest(forrest,testing_data)

Actual: Freezer Freezer : 15 / 16
Actual: Kitchen Kitchen : 11 / 16
Actual: Livingroom Kitchen : 15 / 16
Actual: Greenhouse Greenhouse : 9 / 16
