### Module tự động phân loại tin nhắn 

In [1]:
import sys
import io
from io import BytesIO
import pandas as pd
import warnings
import time

if not sys.warnoptions:
    warnings.simplefilter("ignore")
    
# Cac goi giao dien
from IPython.display import display, clear_output
from IPython.display import HTML
import ipywidgets as widgets
from ipywidgets import Button, Layout
from ipyupload import FileUpload

import multi_label_classification_library as lib

In [2]:
input_browser = FileUpload(
    # https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input#attr-accept
    # eg. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    accept='.csv', # default
    # True to accept multiple files upload else False
    multiple=False, # default
    # True to disable the button else False to enable it
    disabled=False, # default
    # CSS transparently passed to button (in a fact a label element)
    # e.g. 'color: darkblue; background-color: lightsalmon; width: 180px;'
    style_button='' # default
)

algo_choice= widgets.RadioButtons(
    options=['Binary Relevance với Gaussian Naïve Bayes', 'Binary Relevance với SVC', 'Binary Relevance với Logistic Regression',
             'Classifier Chains với Gaussian Naïve Bayes', 'Multi-label K-Nearest Neighbours (MLkNN)'],
    value='Binary Relevance với Gaussian Naïve Bayes',
    description='',
    disabled=False
)
out = widgets.Output()
out1 = widgets.Output()
out2 = widgets.Output()

choices = widgets.HBox([widgets.Label(value="Lựa chọn thuật toán:"), algo_choice])
train_button = widgets.Button(description='Train')
test_button = widgets.Button(description='Test')
#result_frame = widgets.VBox([widgets.Label(value="Kết quả phân lớp:"), out1],layout=Layout(margin='1px'))
input_text = widgets.Textarea(value='',placeholder='Type something',description='String:',disabled=False)

test_frame = widgets.HBox([widgets.Label(value="Input:"), input_text])
result_frame = widgets.VBox([widgets.Label(value="Kết quả phân lớp:"), out1])
buttons = widgets.HBox([train_button, test_button])
box = widgets.VBox([input_browser, choices, buttons, out])
box11 = widgets.VBox([test_frame])
box1 = widgets.VBox([result_frame])
box2 = widgets.VBox([widgets.Label(value="Chất lượng phân lớp:"), out2])


tab = widgets.Tab()
tab.children = [box, box1, box2]
tab.set_title(0, 'Phân lớp đa nhãn')
tab.set_title(1, 'Kết quả phân lớp')
tab.set_title(2, 'Chất lượng phân lớp')

data_raw = None
data = None
categories = None
vectorizer = None
x_train = None
y_train = None
x_test = None
y_test = None
train_text = None
test_text = None
walltime = 0

def train_on_clicked(_):
    global data_raw, data
    global categories
    global vectorizer, x_train, y_train, x_test, y_test, train_text, test_text
    
    if (not (input_browser.value)):
        with out:
            clear_output()
            
            display(HTML("<h2><font color=red>Bạn phải chọn file dữ liệu!</h2>"))
    else:
        with out: 
            # display data
            clear_output()

            vlst = list(input_browser.value.values())

            data_raw = pd.read_csv(io.BytesIO(vlst[0]['content']), encoding='utf8')
            categories = list(data_raw.columns.values)
            categories = categories[2:]
            display(data_raw.head())
            
            data = lib.data_preprocessing(data_raw)
            
            vectorizer, x_train, y_train, x_test, y_test, train_text, test_text = lib.vectorit(data)

def test_on_clicked(_):
    global data_raw
    global categories
    global vectorizer, x_train, y_train, x_test, y_test, train_text, test_text
    global walltime
    
    tbl = ''
    scores =''
    report = ''
    
    if (not (input_browser.value)):
        with out:
            clear_output()
            
            display(HTML("<h2><font color=red>Bạn phải chọn file dữ liệu!</h2>"))
    else:
        with out1:
            #display multi-label result
            if algo_choice.value == 'Binary Relevance với Gaussian Naïve Bayes':
                start=time.time()
                classifier_br1, predictions_br1 = lib.BinaryRelevance_GNB(x_train, y_train, x_test)
                walltimes = round(time.time()-start,0)
                
                tbl, scores, report = lib.printpredicttable(vectorizer,classifier_br1,predictions_br1,test_text,data_raw,y_test,categories,walltimes,False)

                display(HTML(tbl))
            elif algo_choice.value == 'Binary Relevance với SVC':
                start=time.time()
                classifier_br2, predictions_br2 = lib.BinaryRelevance_SVC(x_train, y_train, x_test)
                walltimes = round(time.time()-start,0)
                
                tbl, scores, report = lib.printpredicttable(vectorizer,classifier_br2,predictions_br2,test_text,data_raw,y_test,categories,walltimes,False)

                display(HTML(tbl))
            elif algo_choice.value == 'Binary Relevance với Logistic Regression':
                start=time.time()
                classifier_br1, predictions_br3 = lib.BinaryRelevance_LR(x_train, y_train, x_test)
                walltimes = round(time.time()-start,0)
                
                tbl, scores, report = lib.printpredicttable(vectorizer,classifier_br3,predictions_br3,test_text,data_raw,y_test,categories,walltimes,False)

                display(HTML(tbl))
            elif algo_choice.value == 'Classifier Chains với Gaussian Naïve Bayes':
                start=time.time()
                classifier_cc, predictions_cc = lib.ClassierChains(x_train, y_train, x_test)
                walltimes = round(time.time()-start,0)
                
                tbl, scores, report = lib.printpredicttable(vectorizer,classifier_cc,predictions_cc,test_text,data_raw,y_test,categories,walltimes,False)

                display(HTML(tbl))
            elif algo_choice.value == 'Multi-label K-Nearest Neighbours (MLkNN)':
                start=time.time()
                classifier_ml, predictions_ml = lib.MlkNN(x_train, y_train, x_test)
                walltimes = round(time.time()-start,0)
                
                tbl, scores, report = lib.printpredicttable(vectorizer,classifier_ml,predictions_ml,test_text,data_raw,y_test,categories,walltimes,False)

                display(HTML(tbl))
            tab.selected_index = 1
    with out2:
        #display scores
        clear_output()
        display(HTML(scores))
        print(report)

train_button.on_click(train_on_clicked)
test_button.on_click(test_on_clicked)

widgets.VBox([tab])

VBox(children=(Tab(children=(VBox(children=(FileUpload(accept='.csv'), HBox(children=(Label(value='Lựa chọn th…