<a href="https://colab.research.google.com/github/francis-100/ai_bootcamp/blob/main/Smarthome.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk

# Download the 'punkt' resource
nltk.download('punkt')
%matplotlib inline

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [2]:
df = pd.read_csv("smarthome.csv")

In [3]:
del df["Number"]
df

Unnamed: 0,Category,Action_needed,Question,Subcategory,Action,Time,Sentence
0,lights,1,0,kitchen,on,today,Illuminate the kitchen today.
1,lights,1,0,kitchen,on,tomorrow,Illuminate the kitchen tomorrow.
2,lights,1,0,kitchen,on,hour,Turn on the light in the kitchen in 10 hours.
3,lights,1,0,kitchen,on,day,Turn on the light in the kitchen in 1 day.
4,lights,1,0,diningroom,on,today,Illuminate the dining room today.
...,...,...,...,...,...,...,...
658,time,0,0,none,none,now,Time.
659,time,0,0,none,none,now,I don't want you to tell me the time.
660,time,1,0,none,none,hour,Tell me the time in an hour.
661,time,1,0,none,none,day,Tell me the time in a day.


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 663 entries, 0 to 662
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Category       663 non-null    object
 1   Action_needed  663 non-null    int64 
 2   Question       663 non-null    int64 
 3   Subcategory    663 non-null    object
 4   Action         663 non-null    object
 5   Time           663 non-null    object
 6   Sentence       663 non-null    object
dtypes: int64(2), object(5)
memory usage: 36.4+ KB


In [5]:
from nltk import word_tokenize
from sklearn.model_selection import train_test_split
import itertools
import math

In [8]:
sentences = df['Sentence']
categories = df['Category']
subcategories = df['Subcategory']
actions = df['Action']

uniquecategories = list(set(categories))
uniquesubcategories = list(set(subcategories))
uniqueactions = list(set(actions))

mergesentences = list(itertools.chain.from_iterable([word_tokenize(sentence.lower()) for sentence in sentences]))
vocabulary = list(set(mergesentences))
print(vocabulary)

['garage', 'certain', 'already', 'turn', 'time', 'spain', 'in', 'of', 'now', 'minutes', 'does', 'home', 'unusual', 'considered', 'other', 'potatoes', 'be', 'room', 'put', 'and', '112', 'library', 'drug', 'tot', 'appliance', 'trains', 'readings', 'brighter', 'players', 'this', 'can', 'lately', 'internet', 'leave', 'some', 'electricity', 'falls', "'re", 'facing', 'feel', 'cut', 'myplace', 'keep', 'bieber', 'live', 'storming', 'at', 'toast', 'departure', 'like', 'if', 'strange', 'motion', 'less', 'gradually', 'were', 'fog', 'clock', 'where', 'music', 'things', 'before', '11', 'drowning', 'eighteen', "n't", 'religions', 'bathroom', 'belgium', 'will', 'the', 'humidity', 'days', 'today', 'nothing', 'antwerp', 'sad', 'antwerpen', 'machine', 'hard', 'viewer', 'getting', 'would', 'cold', 'could', 'language', 'pixels', 'reading', 'maps', 'there', 'when', 'north', 'living', 'say', 'facebook', 'balen', 'with', 'place', 'alive', 'transport', 'movement', 'rooms', 'mean', 'tallest', 'all', 'use', 'li

In [9]:
# calculates how often the word appears in the sentence
def term_frequency(word, sentence):
    return sentence.split().count(word)

In [10]:
# calculates how often the word appears in the entire vocabulary
def document_frequency(word):
    return vocabulary.count(word)

In [11]:
# will make sure that unimportant words such as "and" that occur often will have lower weights
# log taken to avoid exploding of IDF with words such as 'is' that can occur a lot
def inverse_document_frequency(word):
    return math.log(len(vocabulary) / (document_frequency(word) + 1))

In [12]:
# get term frequency inverse document frequency value
def calculate_tfidf(word, sentence):
    return term_frequency(word, sentence) * inverse_document_frequency(word)

In [13]:
# get one-hot encoded vectors for the targets
def one_hot_class_vector(uniqueclasses, w):
    emptyvector = [0 for i in range(len(uniqueclasses))]
    emptyvector[uniqueclasses.index(w)] = 1
    return emptyvector

In [14]:
# get one-hot encoded vectors for the words
def one_hot_vector(w):
    emptyvector = [0 for i in range(len(vocabulary))]
    emptyvector[vocabulary.index(w)] = 1
    return emptyvector

In [15]:
# get one-hot encdoded sentence vector
def sentence_vector(sentence, tfidf=False):
    tokenizedlist = word_tokenize(sentence.lower())
    sentencevector = [0 for i in range(len(vocabulary))]
    count = 0

    for word in tokenizedlist:
        if word in vocabulary:
            count = count + 1
            if tfidf:
                sentencevector = [x + y for x, y in zip(sentencevector, [e * calculate_tfidf(word, sentence) for e in one_hot_vector(word)])]
            else:
                sentencevector = [x + y for x, y in zip(sentencevector, one_hot_vector(word))]

    if count == 0:
        return sentencevector
    else:
        return [(el / count) for el in sentencevector]

In [16]:
# wordvectors = [one_hot_vector(w) for w in vocabulary] # not needed
categoryvectors = [cv.index(1) for cv in [one_hot_class_vector(uniquecategories, w) for w in categories]]
subcategoryvectors = [cv.index(1) for cv in [one_hot_class_vector(uniquesubcategories, w) for w in subcategories]]
actionvectors = [cv.index(1) for cv in [one_hot_class_vector(uniqueactions, w) for w in actions]]
sentencevectors = [sentence_vector(sentence) for sentence in sentences]
sentencevectorstfidf = [sentence_vector(sentence, True) for sentence in sentences]

In [17]:
X_train_cat, X_test_cat, y_train_cat, y_test_cat = train_test_split(sentencevectors, categoryvectors, test_size=0.25, random_state=42)
X_train_cat_tfidf, X_test_cat_tfidf, y_train_cat_tfidf, y_test_cat_tfidf = train_test_split(sentencevectorstfidf, categoryvectors, test_size=0.25, random_state=42)
X_train_subcat, X_test_subcat, y_train_subcat, y_test_subcat = train_test_split(sentencevectors, subcategoryvectors, test_size=0.25, random_state=42)
X_train_action, X_test_action, y_train_action, y_test_action = train_test_split(sentencevectors, actionvectors, test_size=0.25, random_state=42)

In [28]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [24]:
def train_fit(model_name, model, X_train, y_train, X_test, y_test):
    """
    Train and fit the specified model with the given training data and labels,
    and evaluate its performance using the provided test data and labels.

    Args:
    - model_name: A string specifying the name of the model.
    - model: The model object (e.g., MLPClassifier) to train and fit.
    - X_train: The training data.
    - y_train: The training labels.
    - X_test: The test data.
    - y_test: The test labels.

    Returns:
    - The trained and fitted model.
    """
    model.fit(X_train, y_train)
    # Add any evaluation or performance metrics you need here
    return model


In [25]:
mlp_max_iter_model_cat = MLPClassifier(max_iter=10000)
mlp_max_iter_model_cat = train_fit("MLPClassifier", mlp_max_iter_model_cat, X_train_cat, y_train_cat, X_test_cat, y_test_cat)
mlp_max_iter_model_subcat = MLPClassifier(max_iter=10000)
mlp_max_iter_model_subcat = train_fit("MLPClassifier", mlp_max_iter_model_subcat, X_train_subcat, y_train_subcat, X_test_subcat, y_test_subcat)
mlp_max_iter_model_action = MLPClassifier(max_iter=10000)
mlp_max_iter_model_action = train_fit("MLPClassifier", mlp_max_iter_model_action, X_train_action, y_train_action, X_test_action, y_test_action)

In [29]:
# Create MLPClassifier model
mlp_max_iter_model = MLPClassifier(max_iter=10000)

# Train and fit the model
mlp_max_iter_model.fit(X_train_cat, y_train_cat)

In [30]:
def predict(model, classes, sentence):
    y_preds = model.predict([sentence_vector(sentence)])
    return classes[y_preds[0]]

In [31]:
sentence = "Hi Google, please turn off the lights."
print(predict(mlp_max_iter_model, uniquecategories, sentence))
print(predict(mlp_max_iter_model_subcat, uniquesubcategories, sentence))
print(predict(mlp_max_iter_model_action, uniqueactions, sentence))

lights
none
off


In [32]:
sentence = "Turn the lights off in the kitchen."
print(predict(mlp_max_iter_model, uniquecategories, sentence))
print(predict(mlp_max_iter_model_subcat, uniquesubcategories, sentence))
print(predict(mlp_max_iter_model_action, uniqueactions, sentence))

lights
kitchen
off


In [33]:
sentence = "Lower the door."
print(predict(mlp_max_iter_model, uniquecategories, sentence))
print(predict(mlp_max_iter_model_subcat, uniquesubcategories, sentence))
print(predict(mlp_max_iter_model_action, uniqueactions, sentence))

shutters
garage
down
