In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import nltk

# Download the 'punkt' resource
nltk.download('punkt')
%matplotlib inline

ModuleNotFoundError: No module named 'seaborn'

In [None]:
df = pd.read_csv("smarthome.csv")

In [None]:
del df["Number"]
df

Unnamed: 0,Category,Action_needed,Question,Subcategory,Action,Time,Sentence
0,lights,1,0,kitchen,on,today,Illuminate the kitchen today.
1,lights,1,0,kitchen,on,tomorrow,Illuminate the kitchen tomorrow.
2,lights,1,0,kitchen,on,hour,Turn on the light in the kitchen in 10 hours.
3,lights,1,0,kitchen,on,day,Turn on the light in the kitchen in 1 day.
4,lights,1,0,diningroom,on,today,Illuminate the dining room today.
...,...,...,...,...,...,...,...
658,time,0,0,none,none,now,Time.
659,time,0,0,none,none,now,I don't want you to tell me the time.
660,time,1,0,none,none,hour,Tell me the time in an hour.
661,time,1,0,none,none,day,Tell me the time in a day.


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 663 entries, 0 to 662
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Category       663 non-null    object
 1   Action_needed  663 non-null    int64 
 2   Question       663 non-null    int64 
 3   Subcategory    663 non-null    object
 4   Action         663 non-null    object
 5   Time           663 non-null    object
 6   Sentence       663 non-null    object
dtypes: int64(2), object(5)
memory usage: 36.4+ KB


In [None]:
from nltk import word_tokenize
import itertools
import math

In [None]:
# Extract unique words from the DataFrame's 'Sentence' column
# and then print out the resulting vocabulary.

sentences = df['Sentence']
categories = df['Category']
subcategories = df['Subcategory']
actions = df['Action']

uniquecategories = list(set(categories))
uniquesubcategories = list(set(subcategories))
uniqueactions = list(set(actions))

mergesentences = list(itertools.chain.from_iterable([word_tokenize(sentence.lower()) for sentence in sentences]))
vocabulary = list(set(mergesentences))
print(vocabulary)

['time', 'six', 'which', 'first', 'put', 'north', 'leuven', 'tallest', 'species', 'dinin', 'freezing', 'busses', 'info', 'so', 'next', 'for', 'installed', 'humidity', 'shine', 'minutes', 'bus', 'cloudy', 'few', 'cofeemachine', 'on', 'viewer', 'i', 'greece', 'activity', 'music', 'get', 'centigrade', 'one', 'attic', 'mallorca', 'sick', 'coming', 'viewing', 'myplace', 'up', 'leave', 'strange', 'when', 'possible', 'two', 'bathroom', 'day', 'the', 'long', 'house', 'mouth', 'water', 'mean', 'hail', 'getting', 'cut', 'out', 'car', 'week', 'ten', '8', 'could', 'strangers', 'running', 'readings', 'responds', 'ca', 'falls', 'please', 'or', 'over', 'sensors', 'tomorrow', 'who', "'re", 'degrees', 'sandler', 'bored', 'many', 'unusual', 'operation', 'storming', 'bike', 'hit', 'app', 'something', 'alone', 'september', 'drowning', 'spain', 'dining', 'google', 'machine', 'minute', '.', 'make', 'great', 'locationtracker', 'anymore', 'down', 'is', "'s", 'a', 'basement', 'movies', 'hours', 'bright', 'near

In [None]:
# calculate word frequency in a sentence
def term_frequency(word, sentence):
    return sentence.split().count(word)

In [None]:
# calculate word frequency in vocabulary
def document_frequency(word):
    return vocabulary.count(word)

In [None]:
# lower weight for unimportant words
def inverse_document_frequency(word):
    return math.log(len(vocabulary) / (document_frequency(word) + 1))

In [None]:
# get term frequency inverse document frequency value
def calculate_tfidf(word, sentence):
    return term_frequency(word, sentence) * inverse_document_frequency(word)

In [None]:
# get one-hot encoded vectors for the targets
def one_hot_class_vector(uniqueclasses, w):
    emptyvector = [0 for i in range(len(uniqueclasses))]
    emptyvector[uniqueclasses.index(w)] = 1
    return emptyvector

In [None]:
# get one-hot encoded vectors for the words
def one_hot_vector(w):
    emptyvector = [0 for i in range(len(vocabulary))]
    emptyvector[vocabulary.index(w)] = 1
    return emptyvector

In [None]:
# get one-hot encdoded sentence vector
def sentence_vector(sentence, tfidf=False):
    tokenizedlist = word_tokenize(sentence.lower())
    sentencevector = [0 for i in range(len(vocabulary))]
    count = 0

    for word in tokenizedlist:
        if word in vocabulary:
            count = count + 1
            if tfidf:
                sentencevector = [x + y for x, y in zip(sentencevector, [e * calculate_tfidf(word, sentence) for e in one_hot_vector(word)])]
            else:
                sentencevector = [x + y for x, y in zip(sentencevector, one_hot_vector(word))]

    if count == 0:
        return sentencevector
    else:
        return [(el / count) for el in sentencevector]

In [None]:
# wordvectors = [one_hot_vector(w) for w in vocabulary]
categoryvectors = [cv.index(1) for cv in [one_hot_class_vector(uniquecategories, w) for w in categories]]
subcategoryvectors = [cv.index(1) for cv in [one_hot_class_vector(uniquesubcategories, w) for w in subcategories]]
actionvectors = [cv.index(1) for cv in [one_hot_class_vector(uniqueactions, w) for w in actions]]
sentencevectors = [sentence_vector(sentence) for sentence in sentences]
sentencevectorstfidf = [sentence_vector(sentence, True) for sentence in sentences]

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Splitting data into training and testing sets for category prediction
X_train_cat, X_test_cat, y_train_cat, y_test_cat = train_test_split(sentencevectors, categoryvectors, test_size=0.25, random_state=42)

# Splitting data into training and testing sets for category prediction with TF-IDF vectors
X_train_cat_tfidf, X_test_cat_tfidf, y_train_cat_tfidf, y_test_cat_tfidf = train_test_split(sentencevectorstfidf, categoryvectors, test_size=0.25, random_state=42)

# Splitting data into training and testing sets for subcategory prediction
X_train_subcat, X_test_subcat, y_train_subcat, y_test_subcat = train_test_split(sentencevectors, subcategoryvectors, test_size=0.25, random_state=42)

# Splitting data into training and testing sets for action prediction
X_train_action, X_test_action, y_train_action, y_test_action = train_test_split(sentencevectors, actionvectors, test_size=0.25, random_state=42)

In [None]:
def train_fit(model_name, model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    return model

In [None]:
# Create MLPClassifier model
mlp_max_iter_model = MLPClassifier(max_iter=10000)

# Train and fit the model
mlp_max_iter_model.fit(X_train_cat, y_train_cat)

In [None]:
# Initialize MLPClassifier model with maximum iterations set to 10000
mlp_max_iter_model_cat = MLPClassifier(max_iter=10000)

# Train and fit the MLPClassifier model for category prediction
mlp_max_iter_model_cat = train_fit("MLPClassifier", mlp_max_iter_model_cat, X_train_cat, y_train_cat, X_test_cat, y_test_cat)

# Initialize another MLPClassifier model with maximum iterations set to 10000 for subcategory prediction
mlp_max_iter_model_subcat = MLPClassifier(max_iter=10000)

# Train and fit the MLPClassifier model for subcategory prediction
mlp_max_iter_model_subcat = train_fit("MLPClassifier", mlp_max_iter_model_subcat, X_train_subcat, y_train_subcat, X_test_subcat, y_test_subcat)

# Initialize another MLPClassifier model with maximum iterations set to 10000 for action prediction
mlp_max_iter_model_action = MLPClassifier(max_iter=10000)

# Train and fit the MLPClassifier model for action prediction
mlp_max_iter_model_action = train_fit("MLPClassifier", mlp_max_iter_model_action, X_train_action, y_train_action, X_test_action, y_test_action)

In [None]:
# Calculate accuracy for category prediction
accuracy_cat = mlp_max_iter_model_cat.score(X_test_cat, y_test_cat)
print("Accuracy for category prediction:", accuracy_cat)

# Calculate accuracy for subcategory prediction
accuracy_subcat = mlp_max_iter_model_subcat.score(X_test_subcat, y_test_subcat)
print("Accuracy for subcategory prediction:", accuracy_subcat)

# Calculate accuracy for action prediction
accuracy_action = mlp_max_iter_model_action.score(X_test_action, y_test_action)
print("Accuracy for action prediction:", accuracy_action)


Accuracy for category prediction: 0.8975903614457831
Accuracy for subcategory prediction: 0.8674698795180723
Accuracy for action prediction: 0.8975903614457831


In [None]:
def predict(model, classes, sentence):
    y_preds = model.predict([sentence_vector(sentence)])
    return classes[y_preds[0]]

In [None]:
sentence = "Turn the lights off in the kitchen."
print(predict(mlp_max_iter_model, uniquecategories, sentence))
print(predict(mlp_max_iter_model_subcat, uniquesubcategories, sentence))
print(predict(mlp_max_iter_model_action, uniqueactions, sentence))

lights
kitchen
off


In [None]:
sentence = "Turn on the library lights."
print(predict(mlp_max_iter_model, uniquecategories, sentence))
print(predict(mlp_max_iter_model_subcat, uniquesubcategories, sentence))
print(predict(mlp_max_iter_model_action, uniqueactions, sentence))

lights
library
on


In [None]:
sentence = "Close the garage door."
print(predict(mlp_max_iter_model, uniquecategories, sentence))
print(predict(mlp_max_iter_model_subcat, uniquesubcategories, sentence))
print(predict(mlp_max_iter_model_action, uniqueactions, sentence))

garagedoor
garage
down


In [None]:
!pip install SpeechRecognition
!pip install pyaudio


Collecting pyaudio
  Downloading PyAudio-0.2.14.tar.gz (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyaudio
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for pyaudio [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for pyaudio (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for pyaudio[0m[31m
[0mFailed to build pyaudio
[31mERROR: Could not build wheels for pyaudio, which is required to install pypr

In [None]:
import speech_recognition


In [None]:

# Initialize the recognizer
recognizer = speech_recognition.Recognizer()

# Get the microphone
microphone = speech_recognition.Microphone()

# Set the recognition threshold
recognizer.pause_threshold = 3

# Start listening
with microphone as source:
    print("Listening...")
    audio = recognizer.listen(source)

# Recognize the speech
try:
    recognized_text = recognizer.recognize_google(audio)
    print("You said:", recognized_text)
except speech_recognition.UnknownValueError:
    print("Sorry, I couldn't understand what you said.")
except speech_recognition.RequestError as e:
    print("Error:", e)

AttributeError: Could not find PyAudio; check installation