# **Load Data**

In [1]:
!gdown --id 1OlcvGWReJMuyYQuOZm149vHWwPtlboR6 --output train.csv
!gdown --id 1Oi5cRlTybuIF2Fl5Bfsr-KkqrXrdt77w --output valid.csv
!gdown --id 1ep9H6-HvhB4utJRLVcLzieWNUSG3P_uF --output test.csv
     

Downloading...
From: https://drive.google.com/uc?id=1OlcvGWReJMuyYQuOZm149vHWwPtlboR6
To: /content/train.csv
100% 799k/799k [00:00<00:00, 48.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Oi5cRlTybuIF2Fl5Bfsr-KkqrXrdt77w
To: /content/valid.csv
100% 43.3k/43.3k [00:00<00:00, 6.91MB/s]
Downloading...
From: https://drive.google.com/uc?id=1ep9H6-HvhB4utJRLVcLzieWNUSG3P_uF
To: /content/test.csv
100% 43.1k/43.1k [00:00<00:00, 36.0MB/s]


# **Import Packages**

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn import linear_model
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import f1_score

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from scipy.sparse import csr_matrix

%matplotlib inline

# **Read Data**

In [3]:
train = pd.read_csv("train.csv")
valid = pd.read_csv("valid.csv")
test = pd.read_csv("test.csv")
     

In [None]:
test.head()

Unnamed: 0,text,intent
0,add sabrina salerno to the grime instrumentals...,AddToPlaylist
1,i want to bring four people to a place that s ...,BookRestaurant
2,put lindsey cardinale into my hillary clinton ...,AddToPlaylist
3,will it snow in mt on june 13 2038,GetWeather
4,play signe anderson chant music that is newest,PlayMusic


In [4]:
train.groupby('intent')['text'].nunique()

intent
AddToPlaylist           1812
BookRestaurant          1876
GetWeather              1885
PlayMusic               1884
RateBook                1822
SearchCreativeWork      1844
SearchScreeningEvent    1736
Name: text, dtype: int64

# **Feature Engineering**

In [5]:
trainClean = train['text']
testClean = test['text']
vectorizer = TfidfVectorizer(analyzer='word', min_df=0.0, max_df=1.0,max_features=1024, ngram_range=(1,2))
vec = vectorizer.fit(trainClean)

X_train = vec.transform(trainClean)
X_test = vec.transform(testClean)
y_train = train['intent']
y_test = test['intent']

# **KNN** 

In [6]:
neigh = KNeighborsClassifier(n_neighbors=5, weights="distance", p=2)
neigh_train = neigh.fit(X_train, y_train) 
y_pred = neigh_train.predict(X_test)

print("Multi-class accuracy:",accuracy_score(y_test, y_pred),"\n")
print(classification_report(y_test, y_pred))

Multi-class accuracy: 0.9342857142857143 

                      precision    recall  f1-score   support

       AddToPlaylist       0.95      0.98      0.96       124
      BookRestaurant       0.96      0.98      0.97        92
          GetWeather       0.96      0.96      0.96       104
           PlayMusic       0.91      0.85      0.88        86
            RateBook       0.99      0.99      0.99        80
  SearchCreativeWork       0.82      0.90      0.86       107
SearchScreeningEvent       0.98      0.88      0.93       107

            accuracy                           0.93       700
           macro avg       0.94      0.93      0.93       700
        weighted avg       0.94      0.93      0.93       700



# **Naive Bayes**

In [7]:
clf = GaussianNB()
clf.fit(X_train.toarray(),y_train)
y_pred = clf.predict(X_test.toarray())

print("Multi-class accuracy:",accuracy_score(y_test, y_pred),"\n")
print(classification_report(y_test, y_pred))



Multi-class accuracy: 0.9242857142857143 

                      precision    recall  f1-score   support

       AddToPlaylist       0.88      0.98      0.93       124
      BookRestaurant       1.00      0.99      0.99        92
          GetWeather       0.98      1.00      0.99       104
           PlayMusic       0.90      0.84      0.87        86
            RateBook       0.97      0.96      0.97        80
  SearchCreativeWork       0.89      0.77      0.82       107
SearchScreeningEvent       0.87      0.93      0.90       107

            accuracy                           0.92       700
           macro avg       0.93      0.92      0.92       700
        weighted avg       0.92      0.92      0.92       700



# **SVM**

In [8]:
clf = SVC(kernel="linear", C=10)

clf.fit(X_train.toarray(),y_train)
y_pred = clf.predict(X_test.toarray())

print("Multi-class accuracy:",accuracy_score(y_test, y_pred),"\n")
print(classification_report(y_test, y_pred))

Multi-class accuracy: 0.9685714285714285 

                      precision    recall  f1-score   support

       AddToPlaylist       0.99      1.00      1.00       124
      BookRestaurant       0.97      0.99      0.98        92
          GetWeather       0.98      0.98      0.98       104
           PlayMusic       0.95      0.98      0.97        86
            RateBook       1.00      1.00      1.00        80
  SearchCreativeWork       0.90      0.95      0.93       107
SearchScreeningEvent       0.99      0.89      0.94       107

            accuracy                           0.97       700
           macro avg       0.97      0.97      0.97       700
        weighted avg       0.97      0.97      0.97       700



# **Random Forest**

In [9]:
clf = RandomForestClassifier(n_estimators=200)

clf.fit(X_train.toarray(),y_train)
y_pred = clf.predict(X_test.toarray())

print("Multi-class accuracy:",accuracy_score(y_test, y_pred),"\n")
print(classification_report(y_test, y_pred))

Multi-class accuracy: 0.9714285714285714 

                      precision    recall  f1-score   support

       AddToPlaylist       0.99      1.00      1.00       124
      BookRestaurant       0.98      1.00      0.99        92
          GetWeather       0.98      0.98      0.98       104
           PlayMusic       0.96      0.99      0.97        86
            RateBook       1.00      0.99      0.99        80
  SearchCreativeWork       0.90      0.95      0.93       107
SearchScreeningEvent       1.00      0.90      0.95       107

            accuracy                           0.97       700
           macro avg       0.97      0.97      0.97       700
        weighted avg       0.97      0.97      0.97       700

