In [23]:
import pandas as pd
from sklearn import svm

class SVM_Classifier:
    def __init__(self):
        self.lin_clf = None
    
    def train(self, training_data, class_labels):
        self.lin_clf = svm.LinearSVC(dual="auto")
        self.lin_clf.fit(training_data, class_labels)
        return True
    
    def classify(self, test_data):
        results = None
        results = list(self.lin_clf.predict(test_data))
        # print(results)
        return results
    
class Read_CSV_Pandas:
    def __init__(self):
        pass

    def readCsv(self, data_location):
        data = pd.read_csv("dummy_data_atc.csv", index_col=0)
        data = data.to_csv()
        data = data.split("\n")
        x = 0
        while x < len(data):
            data[x] = data[x].split(",")
            x+=1
        data.pop(len(data) - 1)
        return data


In [24]:
# Read in the CSV file
data = Read_CSV_Pandas()
atc_dummy_data = data.readCsv("dummy_data_atc.csv")

# Pretty print, noting that each row looks like this:
# utterance,class_label
for row in atc_dummy_data:
    print(row)


['ground ready to taxi', 'ready to taxi']
['taxi via to runway', 'taxi to runway']
['tower holding short ready to takeoff', 'holding short runway']
['takeoff runway', 'takeoff confirmed']
['tower midfield downwind for runway', 'pattern location']
['taxi via to ramp', 'taxi to ramp']


In [25]:
def tokenizer(utterance):
    return utterance.split(" ")

def make_ngram_vector_dict(list_of_utterances):
    word_list = []
    for utterance in list_of_utterances:
        words = tokenizer(utterance)
        for word in words:
            if word not in word_list:
                word_list.append(word)
    vector_dict = {}
    x = 0
    while x < len(word_list):
        vector_dict[word_list[x]] = x
        x+=1
    return vector_dict

# Extract a feature vector map consisting of words
all_utterances = []
for row in atc_dummy_data:
    all_utterances.append(row[0])
feature_vector_map = make_ngram_vector_dict(all_utterances)

# Pretty print it
import json
print(json.dumps(feature_vector_map, indent=2))

{
  "ground": 0,
  "ready": 1,
  "to": 2,
  "taxi": 3,
  "via": 4,
  "runway": 5,
  "tower": 6,
  "holding": 7,
  "short": 8,
  "takeoff": 9,
  "midfield": 10,
  "downwind": 11,
  "for": 12,
  "ramp": 13
}


In [26]:
def convert_utterance_to_features(vector_map, utterance):
    utterance = tokenizer(utterance)
    output_features = [0] * len(vector_map) # Populate with 0's
    for word in utterance:
        try:
            output_features[vector_map[word]] = 1
        except:
            pass
    return output_features

def add_utterance_as_features_to_data(vector_map, data_array):
    x = 0
    while x < len(data_array):
        data_array[x].append(convert_utterance_to_features(vector_map, data_array[x][0]))
        x+=1
    return data_array

# Add the feature vectors and values as a list to each row in our data
atc_dummy_data = add_utterance_as_features_to_data(feature_vector_map, atc_dummy_data)

# Pretty print it
for row in atc_dummy_data:
    print(row)
    

['ground ready to taxi', 'ready to taxi', [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
['taxi via to runway', 'taxi to runway', [0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]]
['tower holding short ready to takeoff', 'holding short runway', [0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0]]
['takeoff runway', 'takeoff confirmed', [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]]
['tower midfield downwind for runway', 'pattern location', [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0]]
['taxi via to ramp', 'taxi to ramp', [0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1]]


In [27]:
# Train our SVM model based on the calculated feature vector weights and labels

# Extract the features and labels
training_data = []
training_labels = []
for row in atc_dummy_data:
    training_data.append(row[2])
    training_labels.append(row[1])

# Instantiate our classifier
atc_classifier = SVM_Classifier()

# Train our classifier
atc_classifier.train(training_data, training_labels)

True

In [28]:
# Run a test utterance and see what it is classified as
test_utterance = "approaching right midfield downwind"

# Convert the string to feature vectors and weights
test_utterance_features = convert_utterance_to_features(feature_vector_map, test_utterance)

# Classify it
test_utterance_class = atc_classifier.classify([test_utterance_features])

# Extract the only element in the resulting single-dimensional array
test_utterance_class = test_utterance_class[0]

print("Input utterance:\n\t" + test_utterance)
print("Input utterance as feature vectors and weights:\n\t" + str(test_utterance_features))
print("That was classified as:\n\t" + str(test_utterance_class))

Input utterance:
	approaching right midfield downwind
Input utterance as feature vectors and weights:
	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0]
That was classified as:
	pattern location
