In [1]:
import numpy as np 
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
import re
from nltk.corpus import stopwords
import nltk
from nltk import pos_tag
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
import enum

STOPWORDS = set(stopwords.words('english'))
CONTEXTUAL_STOPWORDS = ['&gt;', 'hello', 'said', 'regards', 'hi', 'all', 'please', 'assist','kindly','help','thx','thank','thankyou','you', 'thu', 'fwd', 'forwarded', 'message', 'iappsasiacom', 'date', 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec', 'gmail', 'gmailcom', 'com', 'tell', 'am', 'pm', 'subject', 'query', 'mon', 'tue', 'wed', 'thur', 'fri', 'sat', 'sun']
STOPWORDS.update(CONTEXTUAL_STOPWORDS)
REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
wordnet_lemmatizer = WordNetLemmatizer()
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

def lemmatize(x):
    dirty = word_tokenize(x)
    tokens = []
    for word in dirty:
        if word.strip('.') == '':
            pass
        else:
            tokens.append(word.strip('.'))
    tokens = pos_tag(tokens)
    lemms = ' '.join(wordnet_lemmatizer.lemmatize(key.lower()) for key, value in  tokens if value != 'NNP') #getting rid of proper nouns
    return lemms

def remove_confidentiality_notice(after_split):
    cleaned = []
    counter = 7
    found =  False
    for each in after_split:
        if '*CONFIDENTIALITY NOTICE' in each:
            found = True
            counter -= 1
        else:
            if found:
                counter -= 1
                if counter == 0:
                    found = False
                else:
                    continue
            else:
                cleaned.append(each)
    return cleaned

def remove_fixed_address(after_split):
    cleaned = []
    counter = 5
    found =  False
    for each in after_split:
        if 'ActiveSG Technical Helpdesk' in each:
            found = True
            counter -= 1
        else:
            if found:
                counter -= 1
                if counter == 0:
                    found = False
                else:
                    continue
            else:
                cleaned.append(each)
    return cleaned

def clean_text(text):
    temp = text.split("\r")
    temp = remove_confidentiality_notice(temp)
    temp = remove_fixed_address(temp)
    text = ''.join(word for word in temp if 'From:' not in word and 'To:' not in word and '<' not in word and 'Forwarded' not in word and 
                  'Date' not in word and 'Subject' not in word and 'Cc' not in word
                   and 'n[O]' not in word and '[F]' not in word and 'Sent:' not in word and '@' not in word)
    text = text.lower()
    text = text.replace("\n", " ")
    text = REPLACE_BY_SPACE_RE.sub(' ', text)
    text = BAD_SYMBOLS_RE.sub('', text)
    text = lemmatize(text)
    text = ' '.join(word for word in text.split() if word not in STOPWORDS)
    text = ''.join([i for i in text if not i.isdigit()])
    text = " ".join(text.split())
    return text


In [28]:
df = pd.read_csv('train.csv')
df

Unnamed: 0,content,issue
0,Hi Erik and Kelvin\nPlease help check why pare...,Admin Panel
1,Please help check why parental information is ...,Admin Panel
2,"Hi All,\nKindly assist in checking if user hav...",Wallet
3,"Hi All,\nPlease assist.\nUser have a change in...",Wallet
4,"Hi iApps colleagues,\nplease find the feedback...",Booking
5,"Hi Jun Jie,\n\nThanks for the fast reply.\nI f...",Booking
6,"Hi All,\nThis case needs investigation\n- Issu...",Nexus
7,Hi Azimah\n\nUnder 11 Boys single will not sho...,Nexus
8,Hi Helpdesk\nThere are 83 members in the attac...,Service Request
9,"Hi All,\nThis case requires Ontime creation.\n...",Service Request


In [29]:
df['cleaned_content'] = df['content'].apply(clean_text)

In [30]:
df

Unnamed: 0,content,issue,cleaned_content
0,Hi Erik and Kelvin\nPlease help check why pare...,Admin Panel,erik kelvin check parental information missing...
1,Please help check why parental information is ...,Admin Panel,check parental information missing month month...
2,"Hi All,\nKindly assist in checking if user hav...",Wallet,
3,"Hi All,\nPlease assist.\nUser have a change in...",Wallet,user change citizenship ha also update verifie...
4,"Hi iApps colleagues,\nplease find the feedback...",Booking,iapps colleague find feedback writer lag exper...
5,"Hi Jun Jie,\n\nThanks for the fast reply.\nI f...",Booking,jie thanks fast reply wish point system traffi...
6,"Hi All,\nThis case needs investigation\n- Issu...",Nexus,case need investigation issue member able see ...
7,Hi Azimah\n\nUnder 11 Boys single will not sho...,Nexus,azimah boy single show ha reached maximum capa...
8,Hi Helpdesk\nThere are 83 members in the attac...,Service Request,helpdesk member attached sheet extend activesg...
9,"Hi All,\nThis case requires Ontime creation.\n...",Service Request,case requires ontime creation issue credit rei...


In [31]:
# The maximum number of words to be used. (most frequent)
MAX_NB_WORDS = 50000
# Max number of words in each complaint.
MAX_SEQUENCE_LENGTH = 250
# This is fixed.
EMBEDDING_DIM = 100

tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~', lower=True)
tokenizer.fit_on_texts(df['cleaned_content'].values)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

Found 358 unique tokens.


In [32]:
X = tokenizer.texts_to_sequences(df['cleaned_content'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)

Shape of data tensor: (25, 250)


In [33]:
y = pd.get_dummies(df['issue'])
Y = y.values
print('Shape of label tensor:', Y.shape)

Shape of label tensor: (25, 12)


In [34]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.50, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)

(12, 250) (12, 12)
(13, 250) (13, 12)


In [38]:
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(12, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 250, 100)          5000000   
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, 250, 100)          0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_2 (Dense)              (None, 12)                1212      
Total params: 5,081,612
Trainable params: 5,081,612
Non-trainable params: 0
_________________________________________________________________
None


In [39]:
epochs = 30
batch_size = 64

history = model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30


In [40]:
accr = model.evaluate(X_test,Y_test)
print('Test set\n  Loss: {:0.3f}\n  Accuracy: {:0.3f}'.format(accr[0],accr[1]))

Test set
  Loss: 2.864
  Accuracy: 0.000


In [17]:
new_case = ['I am unable to make my booking for activeSG.']
seq = tokenizer.texts_to_sequences(new_case)
padded = pad_sequences(seq, maxlen=MAX_SEQUENCE_LENGTH)
pred = model.predict(padded)
labels = list(sorted(set(df.issue)))
print(pred, labels[np.argmax(pred)])

[[0.02451572 0.06639016 0.07264177 0.0201415  0.02257665 0.02414218
  0.04334201 0.0851763  0.04125025 0.220555   0.08567125 0.04701773
  0.24657941]] Schools


test = '---------- Forwarded message ---------\r\nFrom: <wenfeng@iappsasia.com>\r\nDate: Thu, Mar 25, 2021 at 4:14 PM\r\nSubject: Fwd: query\r\nTo: <wenfeng@iappsasia.com>\r\n\r\n\r\nhello developer, the customer said blah blah. help\r\n\r\n\r\n\r\n---------- Forwarded message ---------\r\nFrom: lim wilfred <wilfred.lim.wf@gmail.com>\r\nDate: Tue, Mar 23, 2021 at 5:25 PM\r\nSubject: Re: query\r\nTo: Wen Feng Lim <wenfeng@iappsasia.com>\r\n\r\n\r\nThank you for your assistance.\r\n\r\nOn Tue, Mar 23, 2021 at 5:25 PM Wen Feng Lim <wenfeng@iappsasia.com> wrote:\r\n\r\n> How can I help you?\r\n>\r\n> please tell me more\r\n>\r\n> regards,\r\n>\r\n> helpdesk\r\n>\r\n> On Tue, Mar 23, 2021 at 5:24 PM lim wilfred <wilfred.lim.wf@gmail.com>\r\n> wrote:\r\n>\r\n>> hello, i cannot log in\r\n>>\r\n>>\r\n>> help\r\n>>\r\n>>\r\n>>\r\n>> help\r\n>>\r\n>\r\n'

In [18]:
s = '---------- Forwarded message ---------\r\nFrom: Phoebe <phoebexie@iappsasia.com>\r\nDate: Fri, Mar 19, 2021 at 9:32 AM\r\nSubject: Fwd: FW: [CASE:217438] FW: Swimsafer Test Booking\r\nTo: <samuel@iappsasia.com>\r\n\r\n\r\n\r\nWarm Regards,\r\nPhoebe Xie\r\nProduct Director\r\nIAPPS Pte Ltd\r\n3 Fusionopolis Way, Symbiosis #13-25 S(138633)\r\n[O] 6463 1795   [F] 6778 5300\r\nWebsite: www.iappsasia.com\r\nFacebook: www.facebook.com/iappsasia\r\nYoutube: www.youtube.com/user/iAPPSasia\r\n\r\n\r\n\r\n\r\n---------- Forwarded message ---------\r\nFrom: Phoebe <phoebexie@iappsasia.com>\r\nDate: Fri, Mar 19, 2021 at 9:31 AM\r\nSubject: Re: FW: [CASE:217438] FW: Swimsafer Test Booking\r\nTo: Active SG Helpdesk <helpme@iappsasia.com>\r\nCc: <L2@iappsasia.com>\r\n\r\n\r\nHi Helpdesk\r\n\r\nPleasae provide us the screenshot or error message shown. Also, please\r\nexplain the steps taken by user and hence causing user to say "unable to\r\nbook"\r\n\r\nThanks\r\n\r\nWarm Regards,\r\nPhoebe Xie\r\nProduct Director\r\nIAPPS Pte Ltd\r\n3 Fusionopolis Way, Symbiosis #13-25 S(138633)\r\n[O] 6463 1795   [F] 6778 5300\r\nWebsite: www.iappsasia.com\r\nFacebook: www.facebook.com/iappsasia\r\nYoutube: www.youtube.com/user/iAPPSasia\r\n\r\n\r\n\r\n\r\nOn Thu, Mar 18, 2021 at 8:54 AM Active SG Helpdesk <helpme@iappsasia.com>\r\nwrote:\r\n\r\n> Hi All,\r\n>\r\n> Able to assist on the following issue as the earliest assessment the\r\n> instructor is able to book is in May and not in March/April.\r\n>\r\n> Thank you!\r\n>\r\n> Warmest Regards,\r\n> Yuan Ting\r\n>\r\n> ActiveSG Technical Helpdesk\r\n> iAPPS Pte Ltd\r\n> 3 Fusionopolis Way, #13-24/25\r\n> Symbiosis, Singapore 138633\r\n> Helpline: 65941388\r\n>\r\n>\r\n> ------------------------------\r\n>\r\n> *From:* Muhammad_FARRIS_from.TP@sport.gov.sg\r\n> *Date:* 17/03/2021 04:28pm\r\n> *To:* helpme@iappsasia.com\r\n> *Cc:* Jayme_JACOB@sport.gov.sg\r\n> *Subject:* [CASE:217438] FW: Swimsafer Test Booking\r\n>\r\n> Hi Helpdesk,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Could you advise me on the issue below. The instructor below is unable to\r\n> book for any assessments within March and April. He claims that the system\r\n> only allows for him to book in May at the earliest. Are you able to clarify\r\n> on the reason why?\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Warm regards,\r\n> Farris\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *From:* Alex Loh <alexloh77@hotmail.com>\r\n> *Sent:* Wednesday, 17 March 2021 9:47 AM\r\n> *To:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>; Simon TAN (SPORT) <\r\n> Simon_TAN@sport.gov.sg>\r\n> *Cc:* Muhammad FARRIS from.TP (SPORT) <\r\n> Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* Re: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> I had checked with all the coaches from Jurong East complex sat 3pm to\r\n> 6pm, non of them had booked any swimsafer test for the month of mar and\r\n> apr. If no test booking had been made why I am unable to book test?\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks!!!\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> ------------------------------\r\n>\r\n> *From:* Alex Loh <alexloh77@hotmail.com>\r\n> *Sent:* Tuesday, 16 March 2021 10:46 AM\r\n> *To:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>; Simon TAN (SPORT) <\r\n> Simon_TAN@sport.gov.sg>\r\n> *Cc:* Muhammad FARRIS from.TP (SPORT) <\r\n> Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* Re: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> I had checked with all the coaches from Jurong East complex sat 3pm to\r\n> 6pm, non of them had booked any swimsafer test for the month of mar and\r\n> apr. If no test booking had been made why I am unable to book test?\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks!!!\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> On 16 Mar 2021 8:41 am, Alex Loh <alexloh77@hotmail.com> wrote:\r\n>\r\n>\r\n>\r\n> Hi,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> I had checked with all the coaches from Jurong East complex sat 3pm to\r\n> 6pm, non of them had booked any swimsafer test for the month of mar and\r\n> apr. If no test booking had been made why I am unable to book test?\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks!!!\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> ------------------------------\r\n>\r\n> *From:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>\r\n> *Sent:* Monday, 8 March 2021 12:07 PM\r\n> *To:* Alex Loh <alexloh77@hotmail.com>; Simon TAN (SPORT) <\r\n> Simon_TAN@sport.gov.sg>\r\n> *Cc:* Muhammad FARRIS from.TP (SPORT) <\r\n> Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* RE: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi Alex,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> This means that the slots for assessments during March and April have been\r\n> taken up.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Please choose the next earliest available date.\r\n>\r\n>\r\n>\r\n> *Kind Regards*,\r\n>\r\n>\r\n>\r\n> *Jayme Jacob*\r\n> Foundation Sports Executive\r\n> ActiveSG | Sport Singapore\r\n> jayme_jacob@sport.gov.sg | 3 Stadium Drive, Singapore 397630\r\n>\r\n>\r\n>\r\n> [image: Sport Singapore] <https://www.sportsingapore.gov.sg/>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *CONFIDENTIALITY NOTICE:* This email may contain privileged and\r\n> confidential information. If you have been sent or have received this email\r\n> by mistake, please notify the sender and delete the email immediately. You\r\n> must not copy, use or disseminate the information contained in this email.\r\n> Nothing in this email is intended to nor shall it constitute any offer or\r\n> acceptance resulting in binding agreement, unless expressly stated\r\n> otherwise.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *From:* Alex Loh <alexloh77@hotmail.com>\r\n> *Sent:* Monday, 8 March 2021 11:54 AM\r\n> *To:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>; Simon TAN (SPORT) <\r\n> Simon_TAN@sport.gov.sg>\r\n> *Cc:* Muhammad FARRIS from.TP (SPORT) <\r\n> Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* Re: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> I just tried again. Mar and Apr unable to book for test, earliest is may.\r\n> May I know what is the reason?\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks!!!\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> ------------------------------\r\n>\r\n> *From:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>\r\n> *Sent:* Monday, 8 March 2021 11:31 AM\r\n> *To:* Alex Loh <alexloh77@hotmail.com>; Simon TAN (SPORT) <\r\n> Simon_TAN@sport.gov.sg>\r\n> *Cc:* Muhammad FARRIS from.TP (SPORT) <\r\n> Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* RE: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi Alex,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Per the SwimSafer booking system, bookings can only be made 2 weeks prior\r\n> to the date of the test. This is to prevent double bookings.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> As such, the system was not letting you make a booking as you were trying\r\n> to make one for the 13th of March on the 3rd of March.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Do schedule for a later date that the system will allow.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thank you.\r\n>\r\n>\r\n>\r\n> *Kind Regards*,\r\n>\r\n>\r\n>\r\n> *Jayme Jacob*\r\n> Foundation Sports Executive\r\n> ActiveSG | Sport Singapore\r\n> jayme_jacob@sport.gov.sg | 3 Stadium Drive, Singapore 397630\r\n>\r\n>\r\n>\r\n> [image: Sport Singapore] <https://www.sportsingapore.gov.sg/>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *CONFIDENTIALITY NOTICE:* This email may contain privileged and\r\n> confidential information. If you have been sent or have received this email\r\n> by mistake, please notify the sender and delete the email immediately. You\r\n> must not copy, use or disseminate the information contained in this email.\r\n> Nothing in this email is intended to nor shall it constitute any offer or\r\n> acceptance resulting in binding agreement, unless expressly stated\r\n> otherwise.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *From:* Alex Loh <alexloh77@hotmail.com>\r\n> *Sent:* Saturday, 6 March 2021 10:00 PM\r\n> *To:* Simon TAN (SPORT) <Simon_TAN@sport.gov.sg>\r\n> *Cc:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>; Muhammad FARRIS\r\n> from.TP (SPORT) <Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* Re: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>  I had been waiting for your colleague reply, are they still working at\r\n> sport singapore or quit already?\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks!!!\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> ------------------------------\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *From:* Simon TAN (SPORT) <Simon_TAN@sport.gov.sg>\r\n> *Sent:* Wednesday, 3 March 2021 10:56 AM\r\n> *To:* Alex Loh <alexloh77@hotmail.com>\r\n> *Cc:* Jayme JACOB (SPORT) <Jayme_JACOB@sport.gov.sg>; Muhammad FARRIS\r\n> from.TP (SPORT) <Muhammad_FARRIS_from.TP@sport.gov.sg>\r\n> *Subject:* RE: Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi Alex\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> My colleague Jayme and Farris will assist you.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *Simon Tan*\r\n> Team Lead\r\n> Active Ageing Development | Active Masters  | ActiveSG\r\n> simon_tan@sport.gov.sg | T.65 6500 5172 | F. 65 63461842 | 3 Stadium\r\n> Drive, Singapore 397630\r\n>\r\n>\r\n>\r\n> [image: SportSG_EmailSignature_Generic2019]\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> [image: SG United] <https://www.sgunited.gov.sg/>\r\n>\r\n>\r\n>\r\n> *CONFIDENTIALITY NOTICE:* This email may contain privileged and\r\n> confidential information. If you have been sent or have received this email\r\n> by mistake, please notify the sender and delete the email immediately. You\r\n> must not copy, use or disseminate the information contained in this email.\r\n> Nothing in this email is intended to nor shall it constitute any offer or\r\n> acceptance resulting in binding agreement, unless expressly stated\r\n> otherwise.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> *From:* Alex Loh <alexloh77@hotmail.com>\r\n> *Sent:* Wednesday, 3 March 2021 10:47 AM\r\n> *To:* SPORT swimsafer (SPORT) <SPORT_swimsafer@sport.gov.sg>\r\n> *Cc:* Simon TAN (SPORT) <Simon_TAN@sport.gov.sg>\r\n> *Subject:* Swimsafer Test Booking\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Hi,\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> I got problem with the test booking. I am unable to book swimsafer test on\r\n> 13 mar 2021 1515hrs at Jurong East Complex, I choose other dates and timing\r\n> also unable to book.\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n> Thanks!!!\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n>\r\n'

In [19]:
clean_text(s)

'warm phoebe xie product director iapps pte ltd fusionopolis way symbiosis # website wwwiappsasiacom facebook wwwfacebookcom iappsasia youtube wwwyoutubecom user iappsasia helpdesk pleasae provide us screenshot error shown also explain steps taken user hence causing user say unable book thanks warm phoebe xie product director iapps pte ltd fusionopolis way symbiosis # website wwwiappsasiacom facebook wwwfacebookcom iappsasia youtube wwwyoutubecom user iappsasia wrote able following issue earliest assessment instructor able book march april warmest yuan ting helpdesk could advise issue instructor unable book assessments within march april claims system allows book earliest able clarify reason warm farris checked coaches jurong east complex pm pm non booked swimsafer test month test booking made unable book test thanks checked coaches jurong east complex pm pm non booked swimsafer test month test booking made unable book test thanks checked coaches jurong east complex pm pm non booked sw

In [20]:
s.split("\r")

['---------- Forwarded message ---------',
 '\nFrom: Phoebe <phoebexie@iappsasia.com>',
 '\nDate: Fri, Mar 19, 2021 at 9:32 AM',
 '\nSubject: Fwd: FW: [CASE:217438] FW: Swimsafer Test Booking',
 '\nTo: <samuel@iappsasia.com>',
 '\n',
 '\n',
 '\n',
 '\nWarm Regards,',
 '\nPhoebe Xie',
 '\nProduct Director',
 '\nIAPPS Pte Ltd',
 '\n3 Fusionopolis Way, Symbiosis #13-25 S(138633)',
 '\n[O] 6463 1795   [F] 6778 5300',
 '\nWebsite: www.iappsasia.com',
 '\nFacebook: www.facebook.com/iappsasia',
 '\nYoutube: www.youtube.com/user/iAPPSasia',
 '\n',
 '\n',
 '\n',
 '\n',
 '\n---------- Forwarded message ---------',
 '\nFrom: Phoebe <phoebexie@iappsasia.com>',
 '\nDate: Fri, Mar 19, 2021 at 9:31 AM',
 '\nSubject: Re: FW: [CASE:217438] FW: Swimsafer Test Booking',
 '\nTo: Active SG Helpdesk <helpme@iappsasia.com>',
 '\nCc: <L2@iappsasia.com>',
 '\n',
 '\n',
 '\nHi Helpdesk',
 '\n',
 '\nPleasae provide us the screenshot or error message shown. Also, please',
 '\nexplain the steps taken by user and h

In [23]:
class issueType(enum.Enum):
    wallet = 0
    adminPanel = 1
    android = 2
    iOS = 3
    internet = 4
    receiptPrinter = 5
    goPanel = 6
    gymPanel = 7
    pos = 8
    iPad = 9
    nexus = 10
    wifi = 11
    web = 12
    merchant = 13
    turnstile = 14
    network = 15
    powerAdapter = 16
    scanner = 17
    switches = 18
    ocbcGateway = 19
    dbsGateway = 20
    serviceRequest = 21
    swimPanel = 22
    swimSafer = 23
    sep = 24
    booking = 25
    feedback = 26
    transaction = 27
    enquiries = 28
    membership = 29
    programme = 30
    publicProgramme = 31
    dataPatchReceipt = 32

In [24]:
print(issueType.iOS)

issueType.iOS


In [25]:
issueType.iOS.value

3

In [26]:
df

Unnamed: 0,content,issue,cleaned_content
0,Hi Erik and Kelvin\nPlease help check why pare...,Account Profile,erik kelvin check parental information missing...
1,Please help check why parental information is ...,Account Profile,check parental information missing month month...
2,"Hi All,\nKindly assist in checking if user hav...",ActiveSG Credits,
3,"Hi All,\nPlease assist.\nUser have a change in...",ActiveSG Credits,user change citizenship also update verified a...
4,"Hi iApps colleagues,\nplease find the feedback...",Booking,iapps colleagues find feedback writer lag expe...
5,"Hi Jun Jie,\n\nThanks for the fast reply.\nI f...",Booking,jie thanks fast reply wish point system traffi...
6,"Hi All,\nThis case needs investigation\n- Issu...",Competitions,case needs investigation issue member able see...
7,Hi Azimah\n\nUnder 11 Boys single will not sho...,Competitions,azimah boys single show reached maximum capaci...
8,Hi Helpdesk\nThere are 83 members in the attac...,Credit expiry request,helpdesk members attached sheet extend actives...
9,"Hi All,\nThis case requires Ontime creation.\n...",Credit expiry request,case requires ontime creation issue credit rei...


In [15]:
df = pd.read_csv('report.csv')
df.head()

Unnamed: 0,Case Number,Channel,Case Title *,Status,Subject *,Date Created,Case Description,Issue Type,Issue Sub Type
0,212998,Email,Unable to buy pass,Closed,iAPPS (General Enquiry),1/3/2021 0:13,Hi\r\n\r\nI would like to purchase swim pass. ...,Enquiries,Purchase pass
1,212999,Email,ActiveSG Contact Us,Closed,iAPPS (General Enquiry),1/3/2021 1:51,I can’t update my profile with mobile number 8...,Membership,Unable To Update
2,213004,Email,Badminton Booking,Closed,iAPPS (General Enquiry),1/3/2021 7:28,For last 2 days i had trouble booking for slot...,Booking,Booking Enquiry
3,213006,Email,Re: [CASE:210700] Re: ^_ISSUES^_ FOR SWIMSAFER...,Closed,iAPPS (General Enquiry),1/3/2021 8:07,"\nDear Yuan Ting,Great! Thxs for helping.\nChe...",SwimSafer,Wrong Stage
4,213007,Email,POSSIBLE TO ADD STUDENT FOR CREATED TEST,Closed,iAPPS (General Enquiry),1/3/2021 8:19,"\r\n\r\nDear Helpdesk,Missed out 1 student for...",SwimSafer,Others


In [4]:
df.isnull().sum()

Case Number          0
Channel              0
Case Title *         0
Status               0
Subject *            0
Date Created         0
Case Description     0
Issue Type          24
Issue Sub Type      25
dtype: int64

In [16]:
df.shape

(1435, 9)

In [17]:
df = df.dropna()

In [18]:
df.isnull().sum()

Case Number         0
Channel             0
Case Title *        0
Status              0
Subject *           0
Date Created        0
Case Description    0
Issue Type          0
Issue Sub Type      0
dtype: int64

In [19]:
df = df[['Case Description', 'Issue Type']]
df.to_csv('train.csv', index=False)

In [20]:
df.head()

Unnamed: 0,Case Description,Issue Type
0,Hi\r\n\r\nI would like to purchase swim pass. ...,Enquiries
1,I can’t update my profile with mobile number 8...,Membership
2,For last 2 days i had trouble booking for slot...,Booking
3,"\nDear Yuan Ting,Great! Thxs for helping.\nChe...",SwimSafer
4,"\r\n\r\nDear Helpdesk,Missed out 1 student for...",SwimSafer
