# Example From Pre-processing to Prediction

In [1]:
### General Packages ###
import pandas as pd
import datetime
import altair as alt

### For Model Exporting ###
from joblib import dump, load

### Metrics for Evaluation ###
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix

### For Board Modelling ###
from sklearn.svm import SVC

### Ticket triage functions ###
import sys
sys.path.append("src/Auxiliary/")
sys.path.append("src/Cleaning/")
sys.path.append("src/Model/")
sys.path.append("src/Tokenizer/")

### For pre-processing ###
import ticket_cleaner
import bert_tokenizer

### For Board Modelling ###
import board

### For Severity Modelling ###
import severity

### For Impact Modelling ###
import impact

Compute engine used:  cuda:0


# Import Data

In [2]:
train_set = pd.read_excel("Data/Tickets with Classifications.xlsx")
test_set = pd.read_csv("Data/NewTestSet.csv")
train_set.head()

Unnamed: 0,ticketNbr,company_name,contact_name,Summary,Initial_Description,SR_Impact_RecID,SR_Severity_RecID,SR_Board_RecID,Source,date_entered
0,1367437,Igloo Building Supplies Group,Steven Gravel,- Set up authenticator,"Hello,\n\nI have a new Iphone and I cannot fig...",1,3,41,Email Connector,2021-02-22 08:07:10
1,1367056,Group2 Architecture Interior Design Ltd.,Ron Murdoch,Ron Murdoch - bluebeam update failed,Can you tell me what your computer name is and...,1,2,41,Email Connector,2021-02-21 10:36:46
2,1366390,Calmont Group,Brad Wearmouth,Brad Wearmouth - Email hacked,"Hi,\n\nI believe my email has been compromised...",1,3,41,Email Connector,2021-02-19 21:34:56
3,1366274,E4C,CÃ©line Scott,CÃ©line Scott - Adobe still not fully functioning,### Summary of Issue\r\nAdobe still not fully ...,1,2,41,DeskDirector,2021-02-19 17:37:03
4,1366248,Durocher Simpson Koehli Erler LLP,Linette Rasmussen,Robert Simpson - Cannot log into cloud,"Good afternoon,\n\nI am starting a ticket for ...",1,3,41,Email Connector,2021-02-19 16:35:57


# Pre-processing

In [3]:
X_train = ticket_cleaner.clean_tickets(ticketNbr = train_set.ticketNbr, contact_name = train_set.contact_name, company_name = train_set.company_name, Summary = train_set.Summary, Initial_Description = train_set.Initial_Description, Impact = train_set.SR_Impact_RecID, Severity = train_set.SR_Severity_RecID, Board = train_set.SR_Board_RecID, Source = train_set.Source, date_entered = train_set.date_entered)
X_test = ticket_cleaner.clean_tickets(ticketNbr = test_set.ticketNbr, contact_name = test_set.contact_name, company_name = test_set.company_name, Summary = test_set.Summary, Initial_Description = test_set.Initial_Description, Impact = test_set.SR_Impact_RecID, Severity = test_set.SR_Severity_RecID, Board = test_set.SR_Board_RecID, Source = test_set.Source, date_entered = test_set.date_entered)

In [4]:
X_train.head()

Unnamed: 0,combined_text,Impact,Severity,Board,Source,email_connector,deskdirector,email,renewal,escalation,brd36,brd41,brd43,Low_vs_MedHigh,LowMed_vs_High
0,Set up authenticator. i have a new iphone and ...,0,2,41,Email Connector,1,0,0,0,0,0,1,0,1,0
1,bluebeam update failed. can you tell me what y...,0,1,41,Email Connector,1,0,0,0,0,0,1,0,1,1
2,Email hacked. i believe my email has been comp...,0,2,41,Email Connector,1,0,0,0,0,0,1,0,1,0
3,Adobe still not fully functioning. adobe still...,0,1,41,DeskDirector,0,1,0,0,0,0,1,0,1,1
4,Cannot log into cloud. i am starting a ticket ...,0,2,41,Email Connector,1,0,0,0,0,0,1,0,1,0


# Board Prediction

In [5]:
# Replace this with roBERTa
#Imports
from bert_tokenizer import BERT_Tokenizer
import transformers as ppb
from transformers import DistilBertModel, DistilBertTokenizer

#Loading pre-trained models
max_length = 100
model_class, tokenizer_class, pretrained_weights = (ppb.DistilBertModel, ppb.DistilBertTokenizer, 'distilbert-base-uncased')
tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
model = model_class.from_pretrained(pretrained_weights)

#Tokenizers
X_text_train = BERT_Tokenizer(model = model, tokenizer = tokenizer, text = X_train.combined_text, max_len = max_length)
X_text_test = BERT_Tokenizer(model = model, tokenizer = tokenizer, text = X_test.combined_text, max_len = max_length)

Total Time (mins): 0:01:53
Total Time (mins): 0:03:56


In [6]:
#Model Board and Predict
Y_train_board = X_train.Board
Y_test_board = X_test.Board

board_svm = SVC(C=1, kernel='linear', class_weight='balanced', gamma=0.1)
board_svm.fit(X_text_train,Y_train_board)

board_train = board_svm.predict(X_text_train)
board_test = board_svm.predict(X_text_test)

print("Training: \n",confusion_matrix(Y_train_board,board_train))
print("Testing: \n",confusion_matrix(Y_test_board,board_test))

Training: 
 [[430   1   2]
 [  5 761  47]
 [ 10  43 698]]
Testing: 
 [[ 377   15   64]
 [  50 1367  418]
 [ 176  249 1284]]


# Severity Prediction

In [7]:
# Combine Text and OHE Source with Board Predictions 
import severity

X_features_train = severity.add_board_predictions(X_train, board_predict = board_train)
X_features_test = severity.add_board_predictions(X_test, board_predict = board_test)

# Get Y Labels
Y_severity_train = X_train.Severity
Y_severity_test = X_test.Severity

# Encode Text with BERT
X_text_src_board_train = severity.format_inputs(X_features_train, max_len = 100)
X_text_src_board_test = severity.format_inputs(X_features_test, max_len = 100)

--Importing pre-trained BERT model and tokenizer (1/3)--
--Tokenizing Text. May take a while. (2/3)--
Total Time (mins): 0:01:53
--Combining Source and Board (3/3)--
--Importing pre-trained BERT model and tokenizer (1/3)--
--Tokenizing Text. May take a while. (2/3)--
Total Time (mins): 0:03:49
--Combining Source and Board (3/3)--


In [8]:
#Train Model
severity.train_combined(X_text_src_board_train,Y_severity_train, save_model = "Y", export_path = "./", verbose=2)

#Use Model to Predict
pred_probs = severity.predict_combined(X_predict = X_text_src_board_test, import_path = "./svm_severity_combined.joblib", verbose = 2)

print(confusion_matrix(Y_severity_test,pred_probs.Predict))

AttributeError: module 'severity' has no attribute 'train_combined'

In [None]:
test_results = pred_probs.copy()
test_results = test_results.assign(text = X_test.combined_text, Actual = X_test.Severity)
test_results

# Impact Prediction

In [None]:
# Combine Text and OHE Source with Board Predictions 
import impact 

X_features_train = severity.add_board_predictions(X_train, board_predict = board_train)
X_features_test = severity.add_board_predictions(X_test, board_predict = board_test)

# Get Y Labels
Y_impact_train = X_train.Impact
Y_impact_test = X_test.Impact

# Encode Text with BERT
X_text_src_board_train = severity.format_inputs(X_features_train, max_len = 100) 
X_text_src_board_test = severity.format_inputs(X_features_test, max_len = 100) 

In [None]:
#Train Model
impact.train_combined(X_text_src_board_train,Y_impact_train, export_path = "./", verbose=2)

#Use Model to Predict
pred_probs = impact.predict_combined(X_predict = X_text_src_board_test, import_path = "./svm_severity_combined.joblib", verbose = 2)

print(confusion_matrix(Y_impact_test,pred_probs.Predict))

In [None]:
test_results = pred_probs.copy()
test_results = test_results.assign(text = X_test.combined_text, Actual = X_test.Impact)
test_results