In [1]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path
        
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
    
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]
    
sys.meta_path.append(NotebookFinder())

In [2]:
!pip install gensim
!pip3 install konlpy
!pip install --upgrade jtypes.jpype
!pip3 install JPype1-py3
!pip install xlrd
!pip install pydblite
!pip install psutil
!pip install xlsxwriter

Requirement already up-to-date: jtypes.jpype in /usr/local/lib/python3.6/site-packages
Requirement already up-to-date: setuptools>=30.4.0 in /usr/local/lib/python3.6/site-packages (from jtypes.jpype)


In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable   
from torch import optim
import numpy as np
import sys

sys.path.append("./Preprocessor")
sys.path.append("./Classifier")
sys.path.append('./Preprocessor/soy')
from Classifier import classifierModule
from Preprocessor import preprocessorModule
from Preprocessor import format_module

In [11]:
def spamFilterModule(input_path, validation_path = None, sensitivity = 0.5, run_mode = 'default'):
    
    if(validation_path == None):
        time_1 = time.time()
        print("----------------------preprocessing test set------------------------")
        formatted_review_list = preprocessorModule.preprocessModule(input_path, classifierModule.reviewDB, "test")
        print("\n------------------------------inferencing---------------------------------")
        classifierModule.inference(formatted_review_list, sensitivity, run_mode)
        time_2 = time.time()
        print("elapsed time: {:.6f}s ({:.6f}s per review)" .format(time_2-time_1, (time_2-time_1)/len(formatted_review_list)) )
        
    else:
        print("----------------------preprocessing training set------------------------")
        formatted_review_list_for_training = preprocessorModule.preprocessModule(input_path, classifierModule.reviewDB, "train")
        print("\n---------------------preprocessing validation set------------------------")
        formatted_review_list_for_validating = preprocessorModule.preprocessModule(validation_path, classifierModule.reviewDB, "validation")
        #train_net(formatted_review_list_for_training, formatted_review_list_for_validating, model, reviewDB, sensitivity, run_mode)
        print("\n------------------------------training---------------------------------")
        classifierModule.train_net(formatted_review_list_for_training, formatted_review_list_for_validating, sensitivity, run_mode)
        

In [13]:
#spamFilterModule("./Preprocessor/excel/trainset_spam_ham_1to1.xlsx", "./Preprocessor/excel/Commonreviews_snuproject_validation.xlsx")
spamFilterModule("./Preprocessor/excel/unrelated_context_experiment.xlsx", "./Preprocessor/excel/unrelated_context_experiment.xlsx")

----------------------preprocessing training set------------------------
processing 0 th unit...
adding review to DB...
=> Postag initiated
postagging [komoran] 0 ith unit...
processing word embedding...
processing formatted review...

---------------------preprocessing validation set------------------------
processing 0 th unit...
adding review to DB...
=> Postag initiated
postagging [komoran] 0 ith unit...
processing word embedding...
processing formatted review...

------------------------------training---------------------------------
epoch  0: 1.4232208728790283 s
--------- train_acc 0.945000 | ham_acc 0.970297 | spam_acc 0.919192 | f1_score 0.943005 | loss.data 0.370512
---- validation_acc 0.970000 | ham_acc 0.990099 | spam_acc 0.949495 | f1_score 0.969072 | loss.data 0.354346
./Classifier/models/final_39.mdl
epoch  1: 1.4228863716125488 s
--------- train_acc 0.960000 | ham_acc 0.990099 | spam_acc 0.929293 | f1_score 0.958333 | loss.data 0.363517
---- validation_acc 0.955000 | ha

KeyboardInterrupt: 