In [1]:
import numpy as np
import pandas as pd

from helper_funcs import report_f1_results, report_accuracy, split_data

# Read in our feature matrices and labels

In [2]:
bow_feature_matrix = np.loadtxt("./features/bag_of_words.out")

In [3]:
bigram_feature_matrix = np.loadtxt("./features/bigrams.out")

In [4]:
tfidf_feature_matrix = np.loadtxt("./features/tfidf.out")

In [5]:
w2v_feature_matrix = np.loadtxt("./features/word2vec_averaged.out")

In [6]:
labels = np.loadtxt("./features/labels.out")

# Vanilla Perceptron Model

### 1. Write the training function

In [7]:
def vanilla_perceptron_train(features, labels, max_iters=100):
    D = len(features[0])
    w = np.zeros(shape=(D))                # initialize our weights vector with all 0's
    b = 0                                  # initialize our bias as 0

    for idx in range(max_iters):
        print(f" Iteration: [{idx + 1} / {max_iters}]\r", end='', flush=True)
        for x, y in zip(features, labels):
            a = np.dot(w, x) + b        # compute the activation for the example
            if y * a <= 0:
                if y != 1: x = x * y
                w = np.add(w, x)        # update our weights
                b = b + y               # update our bias

    print("\n")

    # Return our weights and biases
    return w, b

# Averaged Perceptron Model

### 1. Write the training function

In [8]:
def averaged_perceptron_train(features, labels, max_iters=100):
    D = len(features[0])
    ctr = 1

    w = np.zeros(shape=(D))                # initialize our weights vector with all 0's
    b = 0                                  # initialize our bias as 0
    w_cached = np.zeros(shape=(D))         # initialize our cached weights vector with all 0's
    b_cached = 0                           # initialize our cached bias as 0

    for idx in range(max_iters):
        print(f" Iteration: [{idx + 1} / {max_iters}]\r", end='', flush=True)
        for x, y in zip(features, labels):
            # Go through our truthfulness calculations
            if y * (np.dot(w, x) + b) <= 0:
                w = np.add(w, (y * x))                        # Update our weights
                b = b + y                                     # Update our bias
                w_cached = np.add(w_cached, (y * ctr * x))    # Update our cached weights
                b_cached = b_cached + (y * ctr)               # Update our cached bias

            ctr = ctr + 1

    print("\n")

    # Return the averaged weights biases biases
    inverse_ctr = 1 / ctr     
    return w - (inverse_ctr * w_cached), b - (inverse_ctr * b_cached)

# sklearn Perceptron Model

In [9]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### 1. Write the training function

In [10]:
def sklearn_perceptron_train(features, labels):
    max_iter = 100000
    learning_rate = 0.01

    perceptron = Perceptron(max_iter=max_iter,
                            eta0=learning_rate)

    # Fit the model to the data
    labels = labels.astype('int')
    perceptron.fit(features, labels)

    # Return the trained model
    return perceptron

### 2. Write the testing function

In [11]:
def sklearn_perceptron_test(features, labels, model, feature_type="No Feature Type Provided"):
    label_predictions = model.predict(features)

    # precision = precision_score(list(labels), label_predictions, average=None)
    # recall = recall_score(list(labels), label_predictions, average=None)
    # f1 = f1_score(list(labels), label_predictions, average=None)
    accuracy = accuracy_score(list(labels), label_predictions)

    # report_f1_results(precision, recall, f1, feature_type, "sklearn Perceptron")
    report_accuracy(accuracy, feature_type, "sklearn Perceptron")

# sklearn SVM Model

In [12]:
from sklearn.svm import LinearSVC, SVC

### 1. Write the training function

In [13]:
def sklearn_svm_train(features, labels):
    svm = LinearSVC()
    svm.fit(features, labels)

    # Return the trained model
    return svm

### 2. Write the testing function

In [14]:
def sklearn_svm_test(features, labels, model, feature_type="No Feature Type Provided"):
    label_predictions = model.predict(features)

    # precision = precision_score(list(labels), label_predictions, average=None)
    # recall = recall_score(list(labels), label_predictions, average=None)
    # f1 = f1_score(list(labels), label_predictions, average=None)
    accuracy = accuracy_score(list(labels), label_predictions)

    # report_f1_results(precision, recall, f1, feature_type, "sklearn SVM")
    report_accuracy(accuracy, feature_type, "sklearn SVM")

# Feedforward Neural Network Model

In [15]:
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F

### 1. Create our Testing and Training classes that inherit from Dataset

In [16]:
class TrainDataset(Dataset):

    def __init__(self, features, labels):
        # Load in our utterances and clasifications as tensors
        self.x = torch.from_numpy(features.astype(np.float32))
        self.y = torch.from_numpy(labels.astype(np.int8))
        self.n_samples = self.x.shape[0]
    
    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        # Return the feature vector and the label
        return self.x[index], self.y[index]

In [17]:
class TestDataset(Dataset):

    def __init__(self, features, labels):
        # Load in our utterances and clasifications as tensors
        self.x = torch.from_numpy(features.astype(np.float32))
        self.y = torch.from_numpy(labels.astype(np.int8))
        self.n_samples = self.x.shape[0]
    
    def __len__(self):
        return self.n_samples

    def __getitem__(self, index):
        # Return the feature vector and the label
        return self.x[index]

### 2. Define the network architecture

In [18]:
class Net(nn.Module):
    def __init__(self, n_input_nodes, n_output_nodes=len(list(set(labels)))):
        super(Net, self).__init__()

        n_hidden_nodes_1 = 50
        n_hidden_nodes_2 = 10

        # (input -> hidden_1)
        self.fc1 = nn.Linear(n_input_nodes, n_hidden_nodes_1)

        # (hidden_1 -> hidden_2)
        self.fc2 = nn.Linear(n_hidden_nodes_1, n_hidden_nodes_2)

        # (hidden_2 -> output)
        self.fc3 = nn.Linear(n_hidden_nodes_2, n_output_nodes)

        # dropout layer to avoid overfitting
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        # add hidden layer with ReLU activation function
        x = F.relu(self.fc1(x))

        # add dropout layer
        x = self.dropout(x)

        # add hidden layer with ReLU activation function
        x = F.relu(self.fc2(x))

        # add dropout layer
        x = self.dropout(x)

        # add output layer
        x = self.fc3(x)
        return x

### 3. Write the training function

In [19]:
def train_fnn(model, train_loader, valid_loader):
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    loss_function = nn.CrossEntropyLoss()

    # Number of epochs
    n_epochs = 50

    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf # set initial "min" to infinity

    for _ in range(n_epochs):
        # monitor training loss
        train_loss = 0.0
        valid_loss = 0.0

        # Train the model
        model.train()
        for data, target in train_loader:
            # target = target.squeeze(1)

            # clear the gradients of all optimized variables
            optimizer.zero_grad()

            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data.float())

            # calculate the loss
            loss = loss_function(output, target.long())

            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()

            # perform a single optimization step (parameter update)
            optimizer.step()

            # update running training loss
            train_loss += loss.item() * data.size(0)

        # Validate the model #
        model.eval() # prep the model for evaluation
        for data, target in valid_loader:
            # target = target.squeeze(1)

            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data.float())

            # calculate the loss
            loss = loss_function(output, target.long())

            # update running validation loss
            valid_loss += loss.item() * data.size(0)

        # calculate average loss over an epoch
        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            torch.save(model.state_dict(), './models/FNN_model.pt')
            valid_loss_min = valid_loss

### 4. Write the testing function

In [20]:
def test_fnn(labels, model, dataloader, feature_type="No Feature Type Provided"):
    prediction_list = []
    for _, batch in enumerate(dataloader):
        outputs = model(batch)
        _, predicted = torch.max(outputs.data, 1)
        prediction_list.append(predicted.cpu())

    # Calculate and report accuracy
    n = len(predicted)
    correct = 0

    for i in range(n):
        act = labels[i]
        if act == predicted[i]: correct += 1

    accuracy = correct / n
    report_accuracy(accuracy, feature_type, "torch FNN")

# Split data into testing and training subsets

In [21]:
bow_train_x, bow_test_x, bow_train_y, bow_test_y = split_data(bow_feature_matrix, labels)
bigram_train_x, bigram_test_x, bigram_train_y, bigram_test_y = split_data(bigram_feature_matrix, labels)
tfidf_train_x, tfidf_test_x, tfidf_train_y, tfidf_test_y = split_data(tfidf_feature_matrix, labels)
w2v_train_x, w2v_test_x, w2v_train_y, w2v_test_y = split_data(w2v_feature_matrix, labels)

# Testing the sklearn Perceptron Model

### 1. Train the model using all the different feature matrices

In [22]:
bow_model = sklearn_perceptron_train(bow_train_x, bow_train_y)

In [23]:
bigram_model = sklearn_perceptron_train(bigram_train_x, bigram_train_y)

In [24]:
tfidf_model = sklearn_perceptron_train(tfidf_train_x, tfidf_train_y)

In [25]:
w2v_model = sklearn_perceptron_train(w2v_train_x, w2v_train_y)

### 2. Test the model using all the different feature matrices and compare the accuracies

In [26]:
sklearn_perceptron_test(bow_test_x, bow_test_y, bow_model, "Bag of Words")
sklearn_perceptron_test(bigram_test_x, bigram_test_y, bigram_model, "Bigram")
sklearn_perceptron_test(tfidf_test_x, tfidf_test_y, tfidf_model, "TF-IDF")
sklearn_perceptron_test(w2v_test_x, w2v_test_y, w2v_model, "Word2Vec")

[Bag of Words]	[sklearn Perceptron]		Accuracy: 46.93%
[Bigram]	[sklearn Perceptron]		Accuracy: 48.26%
[TF-IDF]	[sklearn Perceptron]		Accuracy: 44.85%
[Word2Vec]	[sklearn Perceptron]		Accuracy: 44.35%


# Testing the sklearn SVM Model

### 1. Train the model using all the different feature matrices

In [27]:
bow_model = sklearn_svm_train(bow_train_x, bow_train_y)

In [28]:
bigram_model = sklearn_svm_train(bigram_train_x, bigram_train_y)



In [29]:
tfidf_model = sklearn_svm_train(tfidf_train_x, tfidf_train_y)

In [30]:
w2v_model = sklearn_svm_train(w2v_train_x, w2v_train_y)

### 2. Test the model using all the different feature matrices and compare the accuracies

In [31]:
sklearn_svm_test(bow_test_x, bow_test_y, bow_model, "Bag of Words")
sklearn_svm_test(bigram_test_x, bigram_test_y, bigram_model, "Bigram")
sklearn_svm_test(tfidf_test_x, tfidf_test_y, tfidf_model, "TF-IDF")
sklearn_svm_test(w2v_test_x, w2v_test_y, w2v_model, "Word2Vec")

[Bag of Words]	[sklearn SVM]		Accuracy: 52.99%
[Bigram]	[sklearn SVM]		Accuracy: 51.99%
[TF-IDF]	[sklearn SVM]		Accuracy: 55.32%
[Word2Vec]	[sklearn SVM]		Accuracy: 54.57%


# Testing the torch Feedforward Neural Network Model

In [32]:
n_workers = 0       # Number of subprocesses to use for data loading
batch_size = 50     # How many samples per batch to load
valid_size = 0.2    # Percentage of training set to use as validation

### 1. Train the model using all the different feature matrices

#### 1.1. Bag of Words Features

In [33]:
# Put our training data/labels into a DataFrame
bow_train_data = TrainDataset(bow_train_x, bow_train_y)
bow_test_data = TestDataset(bow_test_x, bow_test_y)

# Obtain training indices that will be used for validation
num_train = len(bow_train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
bow_train_idx, bow_valid_idx = indices[split:], indices[:split]

# Define samplers for obtaining training and validation batches
bow_train_sampler = SubsetRandomSampler(bow_train_idx)
bow_valid_sampler = SubsetRandomSampler(bow_valid_idx)

# Prepare data loaders
bow_train_loader = DataLoader(dataset=bow_train_data, batch_size=batch_size, sampler=bow_train_sampler, num_workers=n_workers)
bow_valid_loader = DataLoader(dataset=bow_train_data, batch_size=batch_size, sampler=bow_valid_sampler, num_workers=n_workers)
bow_test_loader = DataLoader(dataset=bow_test_data, batch_size=batch_size, num_workers=n_workers)

# Create and train our FNN model using the BoW feature vectors
bow_model = Net(bow_feature_matrix.shape[1])
train_fnn(bow_model, bow_train_loader, bow_valid_loader)

#### 1.2. Bigram Features

In [34]:
# Put our training data/labels into a DataFrame
bigram_train_data = TrainDataset(bigram_train_x, bigram_train_y)
bigram_test_data = TestDataset(bigram_test_x, bigram_test_y)

# Obtain training indices that will be used for validation
num_train = len(bigram_train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
bigram_train_idx, bigram_valid_idx = indices[split:], indices[:split]

# Define samplers for obtaining training and validation batches
bigram_train_sampler = SubsetRandomSampler(bigram_train_idx)
bigram_valid_sampler = SubsetRandomSampler(bigram_valid_idx)

# Prepare data loaders
bigram_train_loader = DataLoader(dataset=bigram_train_data, batch_size=batch_size, sampler=bigram_train_sampler, num_workers=n_workers)
bigram_valid_loader = DataLoader(dataset=bigram_train_data, batch_size=batch_size, sampler=bigram_valid_sampler, num_workers=n_workers)
bigram_test_loader = DataLoader(dataset=bigram_test_data, batch_size=batch_size, num_workers=n_workers)

# Create and train our FNN model using the Bigram feature vectors
bigram_model = Net(bigram_feature_matrix.shape[1])
train_fnn(bigram_model, bigram_train_loader, bigram_valid_loader)

#### 1.3. TF-IDF Features

In [35]:
# Put our training data/labels into a DataFrame
tfidf_train_data = TrainDataset(tfidf_train_x, tfidf_train_y)
tfidf_test_data = TestDataset(tfidf_test_x, tfidf_test_y)

# Obtain training indices that will be used for validation
num_train = len(tfidf_train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
tfidf_train_idx, tfidf_valid_idx = indices[split:], indices[:split]

# Define samplers for obtaining training and validation batches
tfidf_train_sampler = SubsetRandomSampler(tfidf_train_idx)
tfidf_valid_sampler = SubsetRandomSampler(tfidf_valid_idx)

# Prepare data loaders
tfidf_train_loader = DataLoader(dataset=tfidf_train_data, batch_size=batch_size, sampler=tfidf_train_sampler, num_workers=n_workers)
tfidf_valid_loader = DataLoader(dataset=tfidf_train_data, batch_size=batch_size, sampler=tfidf_valid_sampler, num_workers=n_workers)
tfidf_test_loader = DataLoader(dataset=tfidf_test_data, batch_size=batch_size, num_workers=n_workers)

# Create and train our FNN model using the TF-IDF feature vectors
tfidf_model = Net(tfidf_feature_matrix.shape[1])
train_fnn(tfidf_model, tfidf_train_loader, tfidf_valid_loader)

#### 1.4. Word2Vec Features

In [36]:
# Put our training data/labels into a DataFrame
w2v_train_data = TrainDataset(w2v_train_x, w2v_train_y)
w2v_test_data = TestDataset(w2v_test_x, w2v_test_y)

# Obtain training indices that will be used for validation
num_train = len(w2v_train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
w2v_train_idx, w2v_valid_idx = indices[split:], indices[:split]

# Define samplers for obtaining training and validation batches
w2v_train_sampler = SubsetRandomSampler(w2v_train_idx)
w2v_valid_sampler = SubsetRandomSampler(w2v_valid_idx)

# Prepare data loaders
w2v_train_loader = DataLoader(dataset=w2v_train_data, batch_size=batch_size, sampler=w2v_train_sampler, num_workers=n_workers)
w2v_valid_loader = DataLoader(dataset=w2v_train_data, batch_size=batch_size, sampler=w2v_valid_sampler, num_workers=n_workers)
w2v_test_loader = DataLoader(dataset=w2v_test_data, batch_size=batch_size, num_workers=n_workers)

# Create and train our FNN model using the Word2Vec feature vectors
w2v_model = Net(w2v_feature_matrix.shape[1])
train_fnn(w2v_model, w2v_train_loader, w2v_valid_loader)

### 2. Test the model using all the different feature matrices and compare the accuracies

In [37]:
test_fnn(bow_test_y, bow_model, bow_test_loader, feature_type="Bag of Words")
test_fnn(bigram_test_y, bigram_model, bigram_test_loader, feature_type="Bigram")
test_fnn(tfidf_test_y, tfidf_model, tfidf_test_loader, feature_type="TF-IDF")
test_fnn(w2v_test_y, w2v_model, w2v_test_loader, feature_type="Word2Vec")

[Bag of Words]	[torch FNN]		Accuracy: 0.00%
[Bigram]	[torch FNN]		Accuracy: 25.00%
[TF-IDF]	[torch FNN]		Accuracy: 25.00%
[Word2Vec]	[torch FNN]		Accuracy: 50.00%
