In [1]:
import pandas as pd
import numpy as np
import spacy

In [2]:
df = pd.read_csv('cleaned_data.csv')
df = df.dropna()


In [3]:
df.shape

(85, 2)

In [4]:
df['Resolved By'].value_counts()

Brendan Gannon            29
Raymond Ransome           17
Bridget Goodwin           16
System User                7
Kevin Saunders             6
Nikolai Nesteroke          4
Micaela Chaves-Caetano     1
Tom Blake                  1
Mia Grace Naglieri         1
Sai Pavan Veluguri         1
Paul Chauvet               1
David Dixon                1
Name: Resolved By, dtype: int64

In [5]:
nlp = spacy.load("en_core_web_lg")

In [6]:
#df['label_num'] = df['Resolved By'].map({'Brendan Gannon':0, 'Raymond Ransome': 1, 'Bridget Goodwin':2,
#                                        'Kevin Saunders':3,'Nikolai Nesteroke':4, 'Micaela Chaves-Caetano':5,
#                                        'Tom Blake':6, 'Mia Grace Naglieri':7, 'Sai Pavan Veluguri':8,
#                                        'Paul Chauvet':9, 'David Dixon':10, 'System User':11 })
df.head()

Unnamed: 0,Resolved By,Title_Desc
0,Bridget Goodwin,CSB027 wants admin rights. This is about my of...
1,Brendan Gannon,Desktop version of Office 365 not working prop...
2,Raymond Ransome,Verify Certificate????. I have the following m...
5,Micaela Chaves-Caetano,"Re: The sound setting issue. Hi, \n\n \n \n\n\..."
6,Raymond Ransome,"Re: The sound setting issue. Hi, \n \nIf Ray i..."


In [7]:
df['Resolved By'].unique()

array(['Bridget Goodwin', 'Brendan Gannon', 'Raymond Ransome',
       'Micaela Chaves-Caetano', 'Kevin Saunders', 'Tom Blake',
       'System User', 'Mia Grace Naglieri', 'Sai Pavan Veluguri',
       'Paul Chauvet', 'David Dixon', 'Nikolai Nesteroke'], dtype=object)

In [8]:
df['Resolved By'] = "__label__" + df['Resolved By'].astype(str)
df.head()

Unnamed: 0,Resolved By,Title_Desc
0,__label__Bridget Goodwin,CSB027 wants admin rights. This is about my of...
1,__label__Brendan Gannon,Desktop version of Office 365 not working prop...
2,__label__Raymond Ransome,Verify Certificate????. I have the following m...
5,__label__Micaela Chaves-Caetano,"Re: The sound setting issue. Hi, \n\n \n \n\n\..."
6,__label__Raymond Ransome,"Re: The sound setting issue. Hi, \n \nIf Ray i..."


In [9]:
df['category_desc'] = df['Resolved By'] + df['Title_Desc']
df.head()

Unnamed: 0,Resolved By,Title_Desc,category_desc
0,__label__Bridget Goodwin,CSB027 wants admin rights. This is about my of...,__label__Bridget GoodwinCSB027 wants admin rig...
1,__label__Brendan Gannon,Desktop version of Office 365 not working prop...,__label__Brendan GannonDesktop version of Offi...
2,__label__Raymond Ransome,Verify Certificate????. I have the following m...,__label__Raymond RansomeVerify Certificate????...
5,__label__Micaela Chaves-Caetano,"Re: The sound setting issue. Hi, \n\n \n \n\n\...",__label__Micaela Chaves-CaetanoRe: The sound s...
6,__label__Raymond Ransome,"Re: The sound setting issue. Hi, \n \nIf Ray i...",__label__Raymond RansomeRe: The sound setting ...


In [10]:
#preprocessing
#Remove punctuation
#Remove extra space
#Make the entire sentence lower case

import re

def preprocess(text):
    text = re.sub(r'[^\w\s\']',' ', text)
    text = re.sub(' +', ' ', text)
    return text.strip().lower() 

In [11]:
df['category_desc'] = df['category_desc'].map(preprocess)
df.head()

Unnamed: 0,Resolved By,Title_Desc,category_desc
0,__label__Bridget Goodwin,CSB027 wants admin rights. This is about my of...,__label__bridget goodwincsb027 wants admin rig...
1,__label__Brendan Gannon,Desktop version of Office 365 not working prop...,__label__brendan gannondesktop version of offi...
2,__label__Raymond Ransome,Verify Certificate????. I have the following m...,__label__raymond ransomeverify certificate i h...
5,__label__Micaela Chaves-Caetano,"Re: The sound setting issue. Hi, \n\n \n \n\n\...",__label__micaela chaves caetanore the sound se...
6,__label__Raymond Ransome,"Re: The sound setting issue. Hi, \n \nIf Ray i...",__label__raymond ransomere the sound setting i...


In [12]:
#Training
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2)

In [13]:
train.shape, test.shape

((68, 3), (17, 3))

In [14]:
#Train test split
train.to_csv("tickets.train", columns = ["category_desc"], index = False, header = False)
test.to_csv("tickets.test", columns = ["category_desc"], index = False, header = False)

In [15]:
#Training and evaluating model
import fasttext

#First parameter (10) is test size. Second and third parameters are precision and recall respectively.
#really bad(10,0.3,0.3)
model = fasttext.train_supervised(input="tickets.train")
model.test("tickets.test")

Read 0M words
Number of words:  1479
Number of labels: 5
Progress: 100.1% words/sec/thread:  119000 lr: -0.000051 avg.loss:  1.613351 ETA:   0h 0m 0sProgress: 100.0% words/sec/thread:  118813 lr:  0.000000 avg.loss:  1.613351 ETA:   0h 0m 0s


(9, 0.3333333333333333, 0.3333333333333333)

In [16]:
#Predictions
model.predict("I am having wifi problems")

(('__label__brendan',), array([0.20001437]))

In [17]:
model.predict("projector not working")

(('__label__brendan',), array([0.20002899]))

In [18]:
model.predict("adobe is not working")

(('__label__brendan',), array([0.20003112]))

In [19]:
model.predict("computer speakers are not working")

(('__label__raymond',), array([0.20002508]))

In [20]:
model.predict("my computer and computer screen will not turn on")

(('__label__raymond',), array([0.20002508]))

In [21]:
model.get_nearest_neighbors("Speakers")

[(0.0, 'the'),
 (0.0, 'and'),
 (0.0, 'a'),
 (0.0, 'new'),
 (0.0, 'in'),
 (0.0, 'on'),
 (0.0, 'gannonno'),
 (0.0, 'talk'),
 (0.0, 'case'),
 (0.0, '3')]

In [22]:
model.predict("hello   my name is kellian collins and my office is in old main room 218a   my computer and computer screen will not turn on the power strip that everything is plugged into also will not turn on   you have my permission to go into my office at any time to fix the problem")

(('__label__brendan',), array([0.20002766]))

In [23]:
model.predict("Hello, I am having trouble logging into the school's website. ")

(('__label__brendan',), array([0.20002355]))

In [24]:
model.predict("The speakers in my classroom are not working")

(('__label__brendan',), array([0.2000307]))

In [25]:
model.predict("raymond can you help me with my speakers")

(('__label__brendan',), array([0.2000262]))

In [26]:
model.predict("I need help logging in Bridget")

(('__label__brendan',), array([0.20002192]))